YuWang0103 commited on
Commit
6b59850
·
verified ·
1 Parent(s): 613b01f

Upload 41 files

Browse files
Files changed (41) hide show
  1. analysis/.DS_Store +0 -0
  2. analysis/__init__.py +0 -0
  3. analysis/__pycache__/__init__.cpython-39 (nds4's conflicted copy 2024-05-30).pyc +0 -0
  4. analysis/__pycache__/__init__.cpython-39.pyc +0 -0
  5. analysis/__pycache__/dist_helper.cpython-39.pyc +0 -0
  6. analysis/__pycache__/spectre_utils.cpython-39 (nds4's conflicted copy 2024-05-30).pyc +0 -0
  7. analysis/__pycache__/spectre_utils.cpython-39.pyc +0 -0
  8. analysis/dist_helper.py +156 -0
  9. analysis/orca/orca +0 -0
  10. analysis/orca/orca.cpp +1532 -0
  11. analysis/orca/orca.h +1488 -0
  12. analysis/orca/tmp_JJOX0U87.txt +25 -0
  13. analysis/orca/tmp_YX4O2JRL.txt +3269 -0
  14. analysis/rdkit_functions.py +334 -0
  15. analysis/spectre_utils.py +928 -0
  16. analysis/visualization.py +221 -0
  17. app.py +89 -0
  18. config.yaml +53 -0
  19. dataset.py +395 -0
  20. demo_model.py +214 -0
  21. diffusion/__init__.py +0 -0
  22. diffusion/__pycache__/__init__.cpython-39.pyc +0 -0
  23. diffusion/__pycache__/diffusion_utils.cpython-39.pyc +0 -0
  24. diffusion/__pycache__/noise_schedule.cpython-39.pyc +0 -0
  25. diffusion/diffusion_utils.py +437 -0
  26. diffusion/distributions.py +32 -0
  27. diffusion/extra_features.py +275 -0
  28. diffusion/extra_features_molecular.py +57 -0
  29. diffusion/layers.py +19 -0
  30. diffusion/noise_schedule.py +225 -0
  31. diffusion/utils.py +137 -0
  32. distributions.py +37 -0
  33. extra_features.py +275 -0
  34. models/__init__.py +0 -0
  35. models/__pycache__/__init__.cpython-39.pyc +0 -0
  36. models/__pycache__/layers.cpython-39.pyc +0 -0
  37. models/__pycache__/transformer_model.cpython-39.pyc +0 -0
  38. models/layers.py +46 -0
  39. models/transformer_model.py +285 -0
  40. requirements.txt +15 -0
  41. utils.py +137 -0
analysis/.DS_Store ADDED
Binary file (6.15 kB). View file
 
analysis/__init__.py ADDED
File without changes
analysis/__pycache__/__init__.cpython-39 (nds4's conflicted copy 2024-05-30).pyc ADDED
Binary file (149 Bytes). View file
 
analysis/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (165 Bytes). View file
 
analysis/__pycache__/dist_helper.cpython-39.pyc ADDED
Binary file (4.57 kB). View file
 
analysis/__pycache__/spectre_utils.cpython-39 (nds4's conflicted copy 2024-05-30).pyc ADDED
Binary file (23.5 kB). View file
 
analysis/__pycache__/spectre_utils.cpython-39.pyc ADDED
Binary file (23.5 kB). View file
 
analysis/dist_helper.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ###############################################################################
2
+ #
3
+ # Adapted from https://github.com/lrjconan/GRAN/ which in turn is adapted from https://github.com/JiaxuanYou/graph-generation
4
+ #
5
+ ###############################################################################
6
+ import pyemd
7
+ import numpy as np
8
+ import concurrent.futures
9
+ from functools import partial
10
+ from scipy.linalg import toeplitz
11
+
12
+
13
+ def emd(x, y, distance_scaling=1.0):
14
+ support_size = max(len(x), len(y))
15
+ d_mat = toeplitz(range(support_size)).astype(float)
16
+ distance_mat = d_mat / distance_scaling
17
+
18
+ # convert histogram values x and y to float, and make them equal len
19
+ x = x.astype(float)
20
+ y = y.astype(float)
21
+ if len(x) < len(y):
22
+ x = np.hstack((x, [0.0] * (support_size - len(x))))
23
+ elif len(y) < len(x):
24
+ y = np.hstack((y, [0.0] * (support_size - len(y))))
25
+
26
+ emd = pyemd.emd(x, y, distance_mat)
27
+ return emd
28
+
29
+
30
+
31
+ def l2(x, y):
32
+ dist = np.linalg.norm(x - y, 2)
33
+ return dist
34
+
35
+
36
+ def emd(x, y, sigma=1.0, distance_scaling=1.0):
37
+ ''' EMD
38
+ Args:
39
+ x, y: 1D pmf of two distributions with the same support
40
+ sigma: standard deviation
41
+ '''
42
+ support_size = max(len(x), len(y))
43
+ d_mat = toeplitz(range(support_size)).astype(float)
44
+ distance_mat = d_mat / distance_scaling
45
+
46
+ # convert histogram values x and y to float, and make them equal len
47
+ x = x.astype(float)
48
+ y = y.astype(float)
49
+ if len(x) < len(y):
50
+ x = np.hstack((x, [0.0] * (support_size - len(x))))
51
+ elif len(y) < len(x):
52
+ y = np.hstack((y, [0.0] * (support_size - len(y))))
53
+
54
+ return np.abs(pyemd.emd(x, y, distance_mat))
55
+
56
+
57
+ def gaussian_emd(x, y, sigma=1.0, distance_scaling=1.0):
58
+ ''' Gaussian kernel with squared distance in exponential term replaced by EMD
59
+ Args:
60
+ x, y: 1D pmf of two distributions with the same support
61
+ sigma: standard deviation
62
+ '''
63
+ support_size = max(len(x), len(y))
64
+ d_mat = toeplitz(range(support_size)).astype(float)
65
+ distance_mat = d_mat / distance_scaling
66
+
67
+ # convert histogram values x and y to float, and make them equal len
68
+ x = x.astype(float)
69
+ y = y.astype(float)
70
+ if len(x) < len(y):
71
+ x = np.hstack((x, [0.0] * (support_size - len(x))))
72
+ elif len(y) < len(x):
73
+ y = np.hstack((y, [0.0] * (support_size - len(y))))
74
+
75
+ emd = pyemd.emd(x, y, distance_mat)
76
+ return np.exp(-emd * emd / (2 * sigma * sigma))
77
+
78
+
79
+ def gaussian(x, y, sigma=1.0):
80
+ support_size = max(len(x), len(y))
81
+ # convert histogram values x and y to float, and make them equal len
82
+ x = x.astype(float)
83
+ y = y.astype(float)
84
+ if len(x) < len(y):
85
+ x = np.hstack((x, [0.0] * (support_size - len(x))))
86
+ elif len(y) < len(x):
87
+ y = np.hstack((y, [0.0] * (support_size - len(y))))
88
+
89
+ dist = np.linalg.norm(x - y, 2)
90
+ return np.exp(-dist * dist / (2 * sigma * sigma))
91
+
92
+
93
+ def gaussian_tv(x, y, sigma=1.0):
94
+ support_size = max(len(x), len(y))
95
+ # convert histogram values x and y to float, and make them equal len
96
+ x = x.astype(float)
97
+ y = y.astype(float)
98
+ if len(x) < len(y):
99
+ x = np.hstack((x, [0.0] * (support_size - len(x))))
100
+ elif len(y) < len(x):
101
+ y = np.hstack((y, [0.0] * (support_size - len(y))))
102
+
103
+ dist = np.abs(x - y).sum() / 2.0
104
+ return np.exp(-dist * dist / (2 * sigma * sigma))
105
+
106
+
107
+ def kernel_parallel_unpacked(x, samples2, kernel):
108
+ d = 0
109
+ for s2 in samples2:
110
+ d += kernel(x, s2)
111
+ return d
112
+
113
+
114
+ def kernel_parallel_worker(t):
115
+ return kernel_parallel_unpacked(*t)
116
+
117
+
118
+ def disc(samples1, samples2, kernel, is_parallel=True, *args, **kwargs):
119
+ ''' Discrepancy between 2 samples '''
120
+ d = 0
121
+
122
+ if not is_parallel:
123
+ for s1 in samples1:
124
+ for s2 in samples2:
125
+ d += kernel(s1, s2, *args, **kwargs)
126
+ else:
127
+ with concurrent.futures.ThreadPoolExecutor() as executor:
128
+ for dist in executor.map(kernel_parallel_worker, [
129
+ (s1, samples2, partial(kernel, *args, **kwargs)) for s1 in samples1
130
+ ]):
131
+ d += dist
132
+ if len(samples1) * len(samples2) > 0:
133
+ d /= len(samples1) * len(samples2)
134
+ else:
135
+ d = 1e+6
136
+ return d
137
+
138
+
139
+ def compute_mmd(samples1, samples2, kernel, is_hist=True, *args, **kwargs):
140
+ ''' MMD between two samples '''
141
+ # normalize histograms into pmf
142
+ if is_hist:
143
+ samples1 = [s1 / (np.sum(s1) + 1e-6) for s1 in samples1]
144
+ samples2 = [s2 / (np.sum(s2) + 1e-6) for s2 in samples2]
145
+ return disc(samples1, samples1, kernel, *args, **kwargs) + disc(samples2, samples2, kernel, *args, **kwargs) - \
146
+ 2 * disc(samples1, samples2, kernel, *args, **kwargs)
147
+
148
+
149
+ def compute_emd(samples1, samples2, kernel, is_hist=True, *args, **kwargs):
150
+ ''' EMD between average of two samples '''
151
+ # normalize histograms into pmf
152
+ if is_hist:
153
+ samples1 = [np.mean(samples1)]
154
+ samples2 = [np.mean(samples2)]
155
+ return disc(samples1, samples2, kernel, *args,
156
+ **kwargs), [samples1[0], samples2[0]]
analysis/orca/orca ADDED
Binary file (95.5 kB). View file
 
analysis/orca/orca.cpp ADDED
@@ -0,0 +1,1532 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <cstdio>
2
+ #include <cstdlib>
3
+ #include <cstring>
4
+ #include <cassert>
5
+ #include <ctime>
6
+ #include <iostream>
7
+ #include <fstream>
8
+ #include <set>
9
+ #include <sstream>
10
+ #include <unordered_map>
11
+ #include <algorithm>
12
+
13
+ using namespace std;
14
+
15
+ typedef long long int64;
16
+ typedef pair<int,int> PII;
17
+ typedef struct { int first, second, third; } TIII;
18
+
19
+ struct PAIR {
20
+ int a, b;
21
+ PAIR(int a0, int b0) { a=min(a0,b0); b=max(a0,b0); }
22
+ };
23
+ bool operator<(const PAIR &x, const PAIR &y) {
24
+ if (x.a==y.a) return x.b<y.b;
25
+ else return x.a<y.a;
26
+ }
27
+ bool operator==(const PAIR &x, const PAIR &y) {
28
+ return x.a==y.a && x.b==y.b;
29
+ }
30
+ struct hash_PAIR {
31
+ size_t operator()(const PAIR &x) const {
32
+ return (x.a<<8) ^ (x.b<<0);
33
+ }
34
+ };
35
+
36
+ struct TRIPLE {
37
+ int a, b, c;
38
+ TRIPLE(int a0, int b0, int c0) {
39
+ a=a0; b=b0; c=c0;
40
+ if (a>b) swap(a,b);
41
+ if (b>c) swap(b,c);
42
+ if (a>b) swap(a,b);
43
+ }
44
+ };
45
+ bool operator<(const TRIPLE &x, const TRIPLE &y) {
46
+ if (x.a==y.a) {
47
+ if (x.b==y.b) return x.c<y.c;
48
+ else return x.b<y.b;
49
+ } else return x.a<y.a;
50
+ }
51
+ bool operator==(const TRIPLE &x, const TRIPLE &y) {
52
+ return x.a==y.a && x.b==y.b && x.c==y.c;
53
+ }
54
+ struct hash_TRIPLE {
55
+ size_t operator()(const TRIPLE &x) const {
56
+ return (x.a<<16) ^ (x.b<<8) ^ (x.c<<0);
57
+ }
58
+ };
59
+
60
+ unordered_map<PAIR, int, hash_PAIR> common2;
61
+ unordered_map<TRIPLE, int, hash_TRIPLE> common3;
62
+ unordered_map<PAIR, int, hash_PAIR>::iterator common2_it;
63
+ unordered_map<TRIPLE, int, hash_TRIPLE>::iterator common3_it;
64
+
65
+ #define common3_get(x) (((common3_it=common3.find(x))!=common3.end())?(common3_it->second):0)
66
+ #define common2_get(x) (((common2_it=common2.find(x))!=common2.end())?(common2_it->second):0)
67
+
68
+ int n,m; // n = number of nodes, m = number of edges
69
+ int *deg; // degrees of individual nodes
70
+ PAIR *edges; // list of edges
71
+
72
+ int **adj; // adj[x] - adjacency list of node x
73
+ PII **inc; // inc[x] - incidence list of node x: (y, edge id)
74
+ bool adjacent_list(int x, int y) { return binary_search(adj[x],adj[x]+deg[x],y); }
75
+ int *adj_matrix; // compressed adjacency matrix
76
+ const int adj_chunk = 8*sizeof(int);
77
+ bool adjacent_matrix(int x, int y) { return adj_matrix[(x*n+y)/adj_chunk]&(1<<((x*n+y)%adj_chunk)); }
78
+ bool (*adjacent)(int,int);
79
+ int getEdgeId(int x, int y) { return inc[x][lower_bound(adj[x],adj[x]+deg[x],y)-adj[x]].second; }
80
+
81
+ int64 **orbit; // orbit[x][o] - how many times does node x participate in orbit o
82
+ int64 **eorbit; // eorbit[x][o] - how many times does node x participate in edge orbit o
83
+
84
+ /** count graphlets on max 4 nodes */
85
+ void count4() {
86
+ clock_t startTime, endTime;
87
+ startTime = clock();
88
+ clock_t startTime_all, endTime_all;
89
+ startTime_all = startTime;
90
+ int frac,frac_prev;
91
+
92
+ // precompute triangles that span over edges
93
+ printf("stage 1 - precomputing common nodes\n");
94
+ int *tri = (int*)calloc(m,sizeof(int));
95
+ frac_prev=-1;
96
+ for (int i=0;i<m;i++) {
97
+ frac = 100LL*i/m;
98
+ if (frac!=frac_prev) {
99
+ printf("%d%%\r",frac);
100
+ frac_prev=frac;
101
+ }
102
+ int x=edges[i].a, y=edges[i].b;
103
+ for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
104
+ if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
105
+ else if (adj[x][xi]<adj[y][yi]) { xi++; }
106
+ else { yi++; }
107
+ }
108
+ }
109
+ endTime = clock();
110
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
111
+ startTime = endTime;
112
+
113
+ // count full graphlets
114
+ printf("stage 2 - counting full graphlets\n");
115
+ int64 *C4 = (int64*)calloc(n,sizeof(int64));
116
+ int *neigh = (int*)malloc(n*sizeof(int)), nn;
117
+ frac_prev=-1;
118
+ for (int x=0;x<n;x++) {
119
+ frac = 100LL*x/n;
120
+ if (frac!=frac_prev) {
121
+ printf("%d%%\r",frac);
122
+ frac_prev=frac;
123
+ }
124
+ for (int nx=0;nx<deg[x];nx++) {
125
+ int y=adj[x][nx];
126
+ if (y >= x) break;
127
+ nn=0;
128
+ for (int ny=0;ny<deg[y];ny++) {
129
+ int z=adj[y][ny];
130
+ if (z >= y) break;
131
+ if (adjacent(x,z)==0) continue;
132
+ neigh[nn++]=z;
133
+ }
134
+ for (int i=0;i<nn;i++) {
135
+ int z = neigh[i];
136
+ for (int j=i+1;j<nn;j++) {
137
+ int zz = neigh[j];
138
+ if (adjacent(z,zz)) {
139
+ C4[x]++; C4[y]++; C4[z]++; C4[zz]++;
140
+ }
141
+ }
142
+ }
143
+ }
144
+ }
145
+ endTime = clock();
146
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
147
+ startTime = endTime;
148
+
149
+ // set up a system of equations relating orbits for every node
150
+ printf("stage 3 - building systems of equations\n");
151
+ int *common = (int*)calloc(n,sizeof(int));
152
+ int *common_list = (int*)malloc(n*sizeof(int)), nc=0;
153
+ frac_prev=-1;
154
+ for (int x=0;x<n;x++) {
155
+ frac = 100LL*x/n;
156
+ if (frac!=frac_prev) {
157
+ printf("%d%%\r",frac);
158
+ frac_prev=frac;
159
+ }
160
+
161
+ int64 f_12_14=0, f_10_13=0;
162
+ int64 f_13_14=0, f_11_13=0;
163
+ int64 f_7_11=0, f_5_8=0;
164
+ int64 f_6_9=0, f_9_12=0, f_4_8=0, f_8_12=0;
165
+ int64 f_14=C4[x];
166
+
167
+ for (int i=0;i<nc;i++) common[common_list[i]]=0;
168
+ nc=0;
169
+
170
+ orbit[x][0]=deg[x];
171
+ // x - middle node
172
+ for (int nx1=0;nx1<deg[x];nx1++) {
173
+ int y=inc[x][nx1].first, ey=inc[x][nx1].second;
174
+ for (int ny=0;ny<deg[y];ny++) {
175
+ int z=inc[y][ny].first, ez=inc[y][ny].second;
176
+ if (adjacent(x,z)) { // triangle
177
+ if (z<y) {
178
+ f_12_14 += tri[ez]-1;
179
+ f_10_13 += (deg[y]-1-tri[ez])+(deg[z]-1-tri[ez]);
180
+ }
181
+ } else {
182
+ if (common[z]==0) common_list[nc++]=z;
183
+ common[z]++;
184
+ }
185
+ }
186
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
187
+ int z=inc[x][nx2].first, ez=inc[x][nx2].second;
188
+ if (adjacent(y,z)) { // triangle
189
+ orbit[x][3]++;
190
+ f_13_14 += (tri[ey]-1)+(tri[ez]-1);
191
+ f_11_13 += (deg[x]-1-tri[ey])+(deg[x]-1-tri[ez]);
192
+ } else { // path
193
+ orbit[x][2]++;
194
+ f_7_11 += (deg[x]-1-tri[ey]-1)+(deg[x]-1-tri[ez]-1);
195
+ f_5_8 += (deg[y]-1-tri[ey])+(deg[z]-1-tri[ez]);
196
+ }
197
+ }
198
+ }
199
+ // x - side node
200
+ for (int nx1=0;nx1<deg[x];nx1++) {
201
+ int y=inc[x][nx1].first, ey=inc[x][nx1].second;
202
+ for (int ny=0;ny<deg[y];ny++) {
203
+ int z=inc[y][ny].first, ez=inc[y][ny].second;
204
+ if (x==z) continue;
205
+ if (!adjacent(x,z)) { // path
206
+ orbit[x][1]++;
207
+ f_6_9 += (deg[y]-1-tri[ey]-1);
208
+ f_9_12 += tri[ez];
209
+ f_4_8 += (deg[z]-1-tri[ez]);
210
+ f_8_12 += (common[z]-1);
211
+ }
212
+ }
213
+ }
214
+
215
+ // solve system of equations
216
+ orbit[x][14]=(f_14);
217
+ orbit[x][13]=(f_13_14-6*f_14)/2;
218
+ orbit[x][12]=(f_12_14-3*f_14);
219
+ orbit[x][11]=(f_11_13-f_13_14+6*f_14)/2;
220
+ orbit[x][10]=(f_10_13-f_13_14+6*f_14);
221
+ orbit[x][9]=(f_9_12-2*f_12_14+6*f_14)/2;
222
+ orbit[x][8]=(f_8_12-2*f_12_14+6*f_14)/2;
223
+ orbit[x][7]=(f_13_14+f_7_11-f_11_13-6*f_14)/6;
224
+ orbit[x][6]=(2*f_12_14+f_6_9-f_9_12-6*f_14)/2;
225
+ orbit[x][5]=(2*f_12_14+f_5_8-f_8_12-6*f_14);
226
+ orbit[x][4]=(2*f_12_14+f_4_8-f_8_12-6*f_14);
227
+ }
228
+
229
+ endTime = clock();
230
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
231
+
232
+ endTime_all = endTime;
233
+ printf("total: %.2f\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
234
+ }
235
+
236
+
237
+ /** count edge orbits of graphlets on max 4 nodes */
238
+ void ecount4() {
239
+ clock_t startTime, endTime;
240
+ startTime = clock();
241
+ clock_t startTime_all, endTime_all;
242
+ startTime_all = startTime;
243
+ int frac,frac_prev;
244
+
245
+ // precompute triangles that span over edges
246
+ printf("stage 1 - precomputing common nodes\n");
247
+ int *tri = (int*)calloc(m,sizeof(int));
248
+ frac_prev=-1;
249
+ for (int i=0;i<m;i++) {
250
+ frac = 100LL*i/m;
251
+ if (frac!=frac_prev) {
252
+ printf("%d%%\r",frac);
253
+ frac_prev=frac;
254
+ }
255
+ int x=edges[i].a, y=edges[i].b;
256
+ for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
257
+ if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
258
+ else if (adj[x][xi]<adj[y][yi]) { xi++; }
259
+ else { yi++; }
260
+ }
261
+ }
262
+ endTime = clock();
263
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
264
+ startTime = endTime;
265
+
266
+ // count full graphlets
267
+ printf("stage 2 - counting full graphlets\n");
268
+ int64 *C4 = (int64*)calloc(m,sizeof(int64));
269
+ int *neighx = (int*)malloc(n*sizeof(int)); // lookup table - edges to neighbors of x
270
+ memset(neighx,-1,n*sizeof(int));
271
+ int *neigh = (int*)malloc(n*sizeof(int)), nn; // lookup table - common neighbors of x and y
272
+ PII *neigh_edges = (PII*)malloc(n*sizeof(PII)); // list of common neighbors of x and y
273
+ frac_prev=-1;
274
+ for (int x=0;x<n;x++) {
275
+ frac = 100LL*x/n;
276
+ if (frac!=frac_prev) {
277
+ printf("%d%%\r",frac);
278
+ frac_prev=frac;
279
+ }
280
+
281
+ for (int nx=0;nx<deg[x];nx++) {
282
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
283
+ neighx[y]=xy;
284
+ }
285
+ for (int nx=0;nx<deg[x];nx++) {
286
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
287
+ if (y >= x) break;
288
+ nn=0;
289
+ for (int ny=0;ny<deg[y];ny++) {
290
+ int z=inc[y][ny].first, yz=inc[y][ny].second;
291
+ if (z >= y) break;
292
+ if (neighx[z]==-1) continue;
293
+ int xz=neighx[z];
294
+ neigh[nn]=z;
295
+ neigh_edges[nn]={xz, yz};
296
+ nn++;
297
+ }
298
+ for (int i=0;i<nn;i++) {
299
+ int z = neigh[i], xz = neigh_edges[i].first, yz = neigh_edges[i].second;
300
+ for (int j=i+1;j<nn;j++) {
301
+ int w = neigh[j], xw = neigh_edges[j].first, yw = neigh_edges[j].second;
302
+ if (adjacent(z,w)) {
303
+ C4[xy]++;
304
+ C4[xz]++; C4[yz]++;
305
+ C4[xw]++; C4[yw]++;
306
+ // another iteration to count this last(smallest) edge instead of calling getEdgeId
307
+ //int zw=getEdgeId(z,w); C4[zw]++;
308
+ }
309
+ }
310
+ }
311
+ }
312
+ for (int nx=0;nx<deg[x];nx++) {
313
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
314
+ neighx[y]=-1;
315
+ }
316
+ }
317
+ endTime = clock();
318
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
319
+ startTime = endTime;
320
+
321
+ // count full graphlets for the smallest edge
322
+ for (int x=0;x<n;x++) {
323
+ frac = 100LL*x/n;
324
+ if (frac!=frac_prev) {
325
+ printf("%d%%\r",frac);
326
+ frac_prev=frac;
327
+ }
328
+ for (int nx=deg[x]-1;nx>=0;nx--) {
329
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
330
+ if (y <= x) break;
331
+ nn=0;
332
+ for (int ny=deg[y]-1;ny>=0;ny--) {
333
+ int z=adj[y][ny];
334
+ if (z <= y) break;
335
+ if (adjacent(x,z)==0) continue;
336
+ neigh[nn++]=z;
337
+ }
338
+ for (int i=0;i<nn;i++) {
339
+ int z = neigh[i];
340
+ for (int j=i+1;j<nn;j++) {
341
+ int zz = neigh[j];
342
+ if (adjacent(z,zz)) {
343
+ C4[xy]++;
344
+ }
345
+ }
346
+ }
347
+ }
348
+ }
349
+ endTime = clock();
350
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
351
+ startTime = endTime;
352
+
353
+ // set up a system of equations relating orbits for every node
354
+ printf("stage 3 - building systems of equations\n");
355
+ int *common = (int*)calloc(n,sizeof(int));
356
+ int *common_list = (int*)malloc(n*sizeof(int)), nc=0;
357
+ frac_prev=-1;
358
+
359
+ for (int x=0;x<n;x++) {
360
+ frac = 100LL*x/n;
361
+ if (frac!=frac_prev) {
362
+ printf("%d%%\r",frac);
363
+ frac_prev=frac;
364
+ }
365
+
366
+ // common nodes of x and some other node
367
+ for (int i=0;i<nc;i++) common[common_list[i]]=0;
368
+ nc=0;
369
+ for (int nx=0;nx<deg[x];nx++) {
370
+ int y=adj[x][nx];
371
+ for (int ny=0;ny<deg[y];ny++) {
372
+ int z=adj[y][ny];
373
+ if (z==x) continue;
374
+ if (common[z]==0) common_list[nc++]=z;
375
+ common[z]++;
376
+ }
377
+ }
378
+
379
+ for (int nx=0;nx<deg[x];nx++) {
380
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
381
+ int e=xy;
382
+ for (int n1=0;n1<deg[x];n1++) {
383
+ int z=inc[x][n1].first, xz=inc[x][n1].second;
384
+ if (z==y) continue;
385
+ if (adjacent(y,z)) { // triangle
386
+ if (x<y) {
387
+ eorbit[e][1]++;
388
+ eorbit[e][10] += tri[xy]-1;
389
+ eorbit[e][7] += deg[z]-2;
390
+ }
391
+ eorbit[e][9] += tri[xz]-1;
392
+ eorbit[e][8] += deg[x]-2;
393
+ }
394
+ }
395
+ for (int n1=0;n1<deg[y];n1++) {
396
+ int z=inc[y][n1].first, yz=inc[y][n1].second;
397
+ if (z==x) continue;
398
+ if (!adjacent(x,z)) { // path x-y-z
399
+ eorbit[e][0]++;
400
+ eorbit[e][6] += tri[yz];
401
+ eorbit[e][5] += common[z]-1;
402
+ eorbit[e][4] += deg[y]-2;
403
+ eorbit[e][3] += deg[x]-1;
404
+ eorbit[e][2] += deg[z]-1;
405
+ }
406
+ }
407
+ }
408
+ }
409
+ // solve system of equations
410
+ for (int e=0;e<m;e++) {
411
+ eorbit[e][11]=C4[e];
412
+ eorbit[e][10]=(eorbit[e][10]-2*eorbit[e][11])/2;
413
+ eorbit[e][9]=(eorbit[e][9]-4*eorbit[e][11]);
414
+ eorbit[e][8]=(eorbit[e][8]-eorbit[e][9]-4*eorbit[e][10]-4*eorbit[e][11]);
415
+ eorbit[e][7]=(eorbit[e][7]-eorbit[e][9]-2*eorbit[e][11]);
416
+ eorbit[e][6]=(eorbit[e][6]-eorbit[e][9])/2;
417
+ eorbit[e][5]=(eorbit[e][5]-eorbit[e][9])/2;
418
+ eorbit[e][4]=(eorbit[e][4]-2*eorbit[e][6]-eorbit[e][8]-eorbit[e][9])/2;
419
+ eorbit[e][3]=(eorbit[e][3]-2*eorbit[e][5]-eorbit[e][8]-eorbit[e][9])/2;
420
+ eorbit[e][2]=(eorbit[e][2]-2*eorbit[e][5]-2*eorbit[e][6]-eorbit[e][9]);
421
+ }
422
+
423
+ endTime = clock();
424
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
425
+
426
+ endTime_all = endTime;
427
+ printf("total: %.2f\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
428
+ }
429
+
430
+
431
+ /** count graphlets on max 5 nodes */
432
+ void count5() {
433
+ clock_t startTime, endTime;
434
+ startTime = clock();
435
+ clock_t startTime_all, endTime_all;
436
+ startTime_all = startTime;
437
+ int frac,frac_prev;
438
+
439
+ // precompute common nodes
440
+ printf("stage 1 - precomputing common nodes\n");
441
+ frac_prev=-1;
442
+ for (int x=0;x<n;x++) {
443
+ frac = 100LL*x/n;
444
+ if (frac!=frac_prev) {
445
+ printf("%d%%\r",frac);
446
+ frac_prev=frac;
447
+ }
448
+ for (int n1=0;n1<deg[x];n1++) {
449
+ int a=adj[x][n1];
450
+ for (int n2=n1+1;n2<deg[x];n2++) {
451
+ int b=adj[x][n2];
452
+ PAIR ab=PAIR(a,b);
453
+ common2[ab]++;
454
+ for (int n3=n2+1;n3<deg[x];n3++) {
455
+ int c=adj[x][n3];
456
+ int st = adjacent(a,b)+adjacent(a,c)+adjacent(b,c);
457
+ if (st<2) continue;
458
+ TRIPLE abc=TRIPLE(a,b,c);
459
+ common3[abc]++;
460
+ }
461
+ }
462
+ }
463
+ }
464
+ // precompute triangles that span over edges
465
+ int *tri = (int*)calloc(m,sizeof(int));
466
+ for (int i=0;i<m;i++) {
467
+ int x=edges[i].a, y=edges[i].b;
468
+ for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
469
+ if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
470
+ else if (adj[x][xi]<adj[y][yi]) { xi++; }
471
+ else { yi++; }
472
+ }
473
+ }
474
+ endTime = clock();
475
+ printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
476
+ startTime = endTime;
477
+
478
+ // count full graphlets
479
+ printf("stage 2 - counting full graphlets\n");
480
+ int64 *C5 = (int64*)calloc(n,sizeof(int64));
481
+ int *neigh = (int*)malloc(n*sizeof(int)), nn;
482
+ int *neigh2 = (int*)malloc(n*sizeof(int)), nn2;
483
+ frac_prev=-1;
484
+ for (int x=0;x<n;x++) {
485
+ frac = 100LL*x/n;
486
+ if (frac!=frac_prev) {
487
+ printf("%d%%\r",frac);
488
+ frac_prev=frac;
489
+ }
490
+ for (int nx=0;nx<deg[x];nx++) {
491
+ int y=adj[x][nx];
492
+ if (y >= x) break;
493
+ nn=0;
494
+ for (int ny=0;ny<deg[y];ny++) {
495
+ int z=adj[y][ny];
496
+ if (z >= y) break;
497
+ if (adjacent(x,z)) {
498
+ neigh[nn++]=z;
499
+ }
500
+ }
501
+ for (int i=0;i<nn;i++) {
502
+ int z = neigh[i];
503
+ nn2=0;
504
+ for (int j=i+1;j<nn;j++) {
505
+ int zz = neigh[j];
506
+ if (adjacent(z,zz)) {
507
+ neigh2[nn2++]=zz;
508
+ }
509
+ }
510
+ for (int i2=0;i2<nn2;i2++) {
511
+ int zz = neigh2[i2];
512
+ for (int j2=i2+1;j2<nn2;j2++) {
513
+ int zzz = neigh2[j2];
514
+ if (adjacent(zz,zzz)) {
515
+ C5[x]++; C5[y]++; C5[z]++; C5[zz]++; C5[zzz]++;
516
+ }
517
+ }
518
+ }
519
+ }
520
+ }
521
+ }
522
+ endTime = clock();
523
+ printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
524
+ startTime = endTime;
525
+
526
+ int *common_x = (int*)calloc(n,sizeof(int));
527
+ int *common_x_list = (int*)malloc(n*sizeof(int)), ncx=0;
528
+ int *common_a = (int*)calloc(n,sizeof(int));
529
+ int *common_a_list = (int*)malloc(n*sizeof(int)), nca=0;
530
+
531
+ // set up a system of equations relating orbit counts
532
+ printf("stage 3 - building systems of equations\n");
533
+ frac_prev=-1;
534
+ for (int x=0;x<n;x++) {
535
+ frac = 100LL*x/n;
536
+ if (frac!=frac_prev) {
537
+ printf("%d%%\r",frac);
538
+ frac_prev=frac;
539
+ }
540
+
541
+ for (int i=0;i<ncx;i++) common_x[common_x_list[i]]=0;
542
+ ncx=0;
543
+
544
+ // smaller graphlets
545
+ orbit[x][0] = deg[x];
546
+ for (int nx1=0;nx1<deg[x];nx1++) {
547
+ int a=adj[x][nx1];
548
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
549
+ int b=adj[x][nx2];
550
+ if (adjacent(a,b)) orbit[x][3]++;
551
+ else orbit[x][2]++;
552
+ }
553
+ for (int na=0;na<deg[a];na++) {
554
+ int b=adj[a][na];
555
+ if (b!=x && !adjacent(x,b)) {
556
+ orbit[x][1]++;
557
+ if (common_x[b]==0) common_x_list[ncx++]=b;
558
+ common_x[b]++;
559
+ }
560
+ }
561
+ }
562
+
563
+ int64 f_71=0, f_70=0, f_67=0, f_66=0, f_58=0, f_57=0; // 14
564
+ int64 f_69=0, f_68=0, f_64=0, f_61=0, f_60=0, f_55=0, f_48=0, f_42=0, f_41=0; // 13
565
+ int64 f_65=0, f_63=0, f_59=0, f_54=0, f_47=0, f_46=0, f_40=0; // 12
566
+ int64 f_62=0, f_53=0, f_51=0, f_50=0, f_49=0, f_38=0, f_37=0, f_36=0; // 8
567
+ int64 f_44=0, f_33=0, f_30=0, f_26=0; // 11
568
+ int64 f_52=0, f_43=0, f_32=0, f_29=0, f_25=0; // 10
569
+ int64 f_56=0, f_45=0, f_39=0, f_31=0, f_28=0, f_24=0; // 9
570
+ int64 f_35=0, f_34=0, f_27=0, f_18=0, f_16=0, f_15=0; // 4
571
+ int64 f_17=0; // 5
572
+ int64 f_22=0, f_20=0, f_19=0; // 6
573
+ int64 f_23=0, f_21=0; // 7
574
+
575
+ for (int nx1=0;nx1<deg[x];nx1++) {
576
+ int a=inc[x][nx1].first, xa=inc[x][nx1].second;
577
+
578
+ for (int i=0;i<nca;i++) common_a[common_a_list[i]]=0;
579
+ nca=0;
580
+ for (int na=0;na<deg[a];na++) {
581
+ int b=adj[a][na];
582
+ for (int nb=0;nb<deg[b];nb++) {
583
+ int c=adj[b][nb];
584
+ if (c==a || adjacent(a,c)) continue;
585
+ if (common_a[c]==0) common_a_list[nca++]=c;
586
+ common_a[c]++;
587
+ }
588
+ }
589
+
590
+ // x = orbit-14 (tetrahedron)
591
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
592
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
593
+ if (!adjacent(a,b)) continue;
594
+ for (int nx3=nx2+1;nx3<deg[x];nx3++) {
595
+ int c=inc[x][nx3].first, xc=inc[x][nx3].second;
596
+ if (!adjacent(a,c) || !adjacent(b,c)) continue;
597
+ orbit[x][14]++;
598
+ f_70 += common3_get(TRIPLE(a,b,c))-1;
599
+ f_71 += (tri[xa]>2 && tri[xb]>2)?(common3_get(TRIPLE(x,a,b))-1):0;
600
+ f_71 += (tri[xa]>2 && tri[xc]>2)?(common3_get(TRIPLE(x,a,c))-1):0;
601
+ f_71 += (tri[xb]>2 && tri[xc]>2)?(common3_get(TRIPLE(x,b,c))-1):0;
602
+ f_67 += tri[xa]-2+tri[xb]-2+tri[xc]-2;
603
+ f_66 += common2_get(PAIR(a,b))-2;
604
+ f_66 += common2_get(PAIR(a,c))-2;
605
+ f_66 += common2_get(PAIR(b,c))-2;
606
+ f_58 += deg[x]-3;
607
+ f_57 += deg[a]-3+deg[b]-3+deg[c]-3;
608
+ }
609
+ }
610
+
611
+ // x = orbit-13 (diamond)
612
+ for (int nx2=0;nx2<deg[x];nx2++) {
613
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
614
+ if (!adjacent(a,b)) continue;
615
+ for (int nx3=nx2+1;nx3<deg[x];nx3++) {
616
+ int c=inc[x][nx3].first, xc=inc[x][nx3].second;
617
+ if (!adjacent(a,c) || adjacent(b,c)) continue;
618
+ orbit[x][13]++;
619
+ f_69 += (tri[xb]>1 && tri[xc]>1)?(common3_get(TRIPLE(x,b,c))-1):0;
620
+ f_68 += common3_get(TRIPLE(a,b,c))-1;
621
+ f_64 += common2_get(PAIR(b,c))-2;
622
+ f_61 += tri[xb]-1+tri[xc]-1;
623
+ f_60 += common2_get(PAIR(a,b))-1;
624
+ f_60 += common2_get(PAIR(a,c))-1;
625
+ f_55 += tri[xa]-2;
626
+ f_48 += deg[b]-2+deg[c]-2;
627
+ f_42 += deg[x]-3;
628
+ f_41 += deg[a]-3;
629
+ }
630
+ }
631
+
632
+ // x = orbit-12 (diamond)
633
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
634
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
635
+ if (!adjacent(a,b)) continue;
636
+ for (int na=0;na<deg[a];na++) {
637
+ int c=inc[a][na].first, ac=inc[a][na].second;
638
+ if (c==x || adjacent(x,c) || !adjacent(b,c)) continue;
639
+ orbit[x][12]++;
640
+ f_65 += (tri[ac]>1)?common3_get(TRIPLE(a,b,c)):0;
641
+ f_63 += common_x[c]-2;
642
+ f_59 += tri[ac]-1+common2_get(PAIR(b,c))-1;
643
+ f_54 += common2_get(PAIR(a,b))-2;
644
+ f_47 += deg[x]-2;
645
+ f_46 += deg[c]-2;
646
+ f_40 += deg[a]-3+deg[b]-3;
647
+ }
648
+ }
649
+
650
+ // x = orbit-8 (cycle)
651
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
652
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
653
+ if (adjacent(a,b)) continue;
654
+ for (int na=0;na<deg[a];na++) {
655
+ int c=inc[a][na].first, ac=inc[a][na].second;
656
+ if (c==x || adjacent(x,c) || !adjacent(b,c)) continue;
657
+ orbit[x][8]++;
658
+ f_62 += (tri[ac]>0)?common3_get(TRIPLE(a,b,c)):0;
659
+ f_53 += tri[xa]+tri[xb];
660
+ f_51 += tri[ac]+common2_get(PAIR(c,b));
661
+ f_50 += common_x[c]-2;
662
+ f_49 += common_a[b]-2;
663
+ f_38 += deg[x]-2;
664
+ f_37 += deg[a]-2+deg[b]-2;
665
+ f_36 += deg[c]-2;
666
+ }
667
+ }
668
+
669
+ // x = orbit-11 (paw)
670
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
671
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
672
+ if (!adjacent(a,b)) continue;
673
+ for (int nx3=0;nx3<deg[x];nx3++) {
674
+ int c=inc[x][nx3].first, xc=inc[x][nx3].second;
675
+ if (c==a || c==b || adjacent(a,c) || adjacent(b,c)) continue;
676
+ orbit[x][11]++;
677
+ f_44 += tri[xc];
678
+ f_33 += deg[x]-3;
679
+ f_30 += deg[c]-1;
680
+ f_26 += deg[a]-2+deg[b]-2;
681
+ }
682
+ }
683
+
684
+ // x = orbit-10 (paw)
685
+ for (int nx2=0;nx2<deg[x];nx2++) {
686
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
687
+ if (!adjacent(a,b)) continue;
688
+ for (int nb=0;nb<deg[b];nb++) {
689
+ int c=inc[b][nb].first, bc=inc[b][nb].second;
690
+ if (c==x || c==a || adjacent(a,c) || adjacent(x,c)) continue;
691
+ orbit[x][10]++;
692
+ f_52 += common_a[c]-1;
693
+ f_43 += tri[bc];
694
+ f_32 += deg[b]-3;
695
+ f_29 += deg[c]-1;
696
+ f_25 += deg[a]-2;
697
+ }
698
+ }
699
+
700
+ // x = orbit-9 (paw)
701
+ for (int na1=0;na1<deg[a];na1++) {
702
+ int b=inc[a][na1].first, ab=inc[a][na1].second;
703
+ if (b==x || adjacent(x,b)) continue;
704
+ for (int na2=na1+1;na2<deg[a];na2++) {
705
+ int c=inc[a][na2].first, ac=inc[a][na2].second;
706
+ if (c==x || !adjacent(b,c) || adjacent(x,c)) continue;
707
+ orbit[x][9]++;
708
+ f_56 += (tri[ab]>1 && tri[ac]>1)?common3_get(TRIPLE(a,b,c)):0;
709
+ f_45 += common2_get(PAIR(b,c))-1;
710
+ f_39 += tri[ab]-1+tri[ac]-1;
711
+ f_31 += deg[a]-3;
712
+ f_28 += deg[x]-1;
713
+ f_24 += deg[b]-2+deg[c]-2;
714
+ }
715
+ }
716
+
717
+ // x = orbit-4 (path)
718
+ for (int na=0;na<deg[a];na++) {
719
+ int b=inc[a][na].first, ab=inc[a][na].second;
720
+ if (b==x || adjacent(x,b)) continue;
721
+ for (int nb=0;nb<deg[b];nb++) {
722
+ int c=inc[b][nb].first, bc=inc[b][nb].second;
723
+ if (c==a || adjacent(a,c) || adjacent(x,c)) continue;
724
+ orbit[x][4]++;
725
+ f_35 += common_a[c]-1;
726
+ f_34 += common_x[c];
727
+ f_27 += tri[bc];
728
+ f_18 += deg[b]-2;
729
+ f_16 += deg[x]-1;
730
+ f_15 += deg[c]-1;
731
+ }
732
+ }
733
+
734
+ // x = orbit-5 (path)
735
+ for (int nx2=0;nx2<deg[x];nx2++) {
736
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
737
+ if (b==a || adjacent(a,b)) continue;
738
+ for (int nb=0;nb<deg[b];nb++) {
739
+ int c=inc[b][nb].first, bc=inc[b][nb].second;
740
+ if (c==x || adjacent(a,c) || adjacent(x,c)) continue;
741
+ orbit[x][5]++;
742
+ f_17 += deg[a]-1;
743
+ }
744
+ }
745
+
746
+ // x = orbit-6 (claw)
747
+ for (int na1=0;na1<deg[a];na1++) {
748
+ int b=inc[a][na1].first, ab=inc[a][na1].second;
749
+ if (b==x || adjacent(x,b)) continue;
750
+ for (int na2=na1+1;na2<deg[a];na2++) {
751
+ int c=inc[a][na2].first, ac=inc[a][na2].second;
752
+ if (c==x || adjacent(x,c) || adjacent(b,c)) continue;
753
+ orbit[x][6]++;
754
+ f_22 += deg[a]-3;
755
+ f_20 += deg[x]-1;
756
+ f_19 += deg[b]-1+deg[c]-1;
757
+ }
758
+ }
759
+
760
+ // x = orbit-7 (claw)
761
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
762
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
763
+ if (adjacent(a,b)) continue;
764
+ for (int nx3=nx2+1;nx3<deg[x];nx3++) {
765
+ int c=inc[x][nx3].first, xc=inc[x][nx3].second;
766
+ if (adjacent(a,c) || adjacent(b,c)) continue;
767
+ orbit[x][7]++;
768
+ f_23 += deg[x]-3;
769
+ f_21 += deg[a]-1+deg[b]-1+deg[c]-1;
770
+ }
771
+ }
772
+ }
773
+
774
+ // solve equations
775
+ orbit[x][72] = C5[x];
776
+ orbit[x][71] = (f_71-12*orbit[x][72])/2;
777
+ orbit[x][70] = (f_70-4*orbit[x][72]);
778
+ orbit[x][69] = (f_69-2*orbit[x][71])/4;
779
+ orbit[x][68] = (f_68-2*orbit[x][71]);
780
+ orbit[x][67] = (f_67-12*orbit[x][72]-4*orbit[x][71]);
781
+ orbit[x][66] = (f_66-12*orbit[x][72]-2*orbit[x][71]-3*orbit[x][70]);
782
+ orbit[x][65] = (f_65-3*orbit[x][70])/2;
783
+ orbit[x][64] = (f_64-2*orbit[x][71]-4*orbit[x][69]-1*orbit[x][68]);
784
+ orbit[x][63] = (f_63-3*orbit[x][70]-2*orbit[x][68]);
785
+ orbit[x][62] = (f_62-1*orbit[x][68])/2;
786
+ orbit[x][61] = (f_61-4*orbit[x][71]-8*orbit[x][69]-2*orbit[x][67])/2;
787
+ orbit[x][60] = (f_60-4*orbit[x][71]-2*orbit[x][68]-2*orbit[x][67]);
788
+ orbit[x][59] = (f_59-6*orbit[x][70]-2*orbit[x][68]-4*orbit[x][65]);
789
+ orbit[x][58] = (f_58-4*orbit[x][72]-2*orbit[x][71]-1*orbit[x][67]);
790
+ orbit[x][57] = (f_57-12*orbit[x][72]-4*orbit[x][71]-3*orbit[x][70]-1*orbit[x][67]-2*orbit[x][66]);
791
+ orbit[x][56] = (f_56-2*orbit[x][65])/3;
792
+ orbit[x][55] = (f_55-2*orbit[x][71]-2*orbit[x][67])/3;
793
+ orbit[x][54] = (f_54-3*orbit[x][70]-1*orbit[x][66]-2*orbit[x][65])/2;
794
+ orbit[x][53] = (f_53-2*orbit[x][68]-2*orbit[x][64]-2*orbit[x][63]);
795
+ orbit[x][52] = (f_52-2*orbit[x][66]-2*orbit[x][64]-1*orbit[x][59])/2;
796
+ orbit[x][51] = (f_51-2*orbit[x][68]-2*orbit[x][63]-4*orbit[x][62]);
797
+ orbit[x][50] = (f_50-1*orbit[x][68]-2*orbit[x][63])/3;
798
+ orbit[x][49] = (f_49-1*orbit[x][68]-1*orbit[x][64]-2*orbit[x][62])/2;
799
+ orbit[x][48] = (f_48-4*orbit[x][71]-8*orbit[x][69]-2*orbit[x][68]-2*orbit[x][67]-2*orbit[x][64]-2*orbit[x][61]-1*orbit[x][60]);
800
+ orbit[x][47] = (f_47-3*orbit[x][70]-2*orbit[x][68]-1*orbit[x][66]-1*orbit[x][63]-1*orbit[x][60]);
801
+ orbit[x][46] = (f_46-3*orbit[x][70]-2*orbit[x][68]-2*orbit[x][65]-1*orbit[x][63]-1*orbit[x][59]);
802
+ orbit[x][45] = (f_45-2*orbit[x][65]-2*orbit[x][62]-3*orbit[x][56]);
803
+ orbit[x][44] = (f_44-1*orbit[x][67]-2*orbit[x][61])/4;
804
+ orbit[x][43] = (f_43-2*orbit[x][66]-1*orbit[x][60]-1*orbit[x][59])/2;
805
+ orbit[x][42] = (f_42-2*orbit[x][71]-4*orbit[x][69]-2*orbit[x][67]-2*orbit[x][61]-3*orbit[x][55]);
806
+ orbit[x][41] = (f_41-2*orbit[x][71]-1*orbit[x][68]-2*orbit[x][67]-1*orbit[x][60]-3*orbit[x][55]);
807
+ orbit[x][40] = (f_40-6*orbit[x][70]-2*orbit[x][68]-2*orbit[x][66]-4*orbit[x][65]-1*orbit[x][60]-1*orbit[x][59]-4*orbit[x][54]);
808
+ orbit[x][39] = (f_39-4*orbit[x][65]-1*orbit[x][59]-6*orbit[x][56])/2;
809
+ orbit[x][38] = (f_38-1*orbit[x][68]-1*orbit[x][64]-2*orbit[x][63]-1*orbit[x][53]-3*orbit[x][50]);
810
+ orbit[x][37] = (f_37-2*orbit[x][68]-2*orbit[x][64]-2*orbit[x][63]-4*orbit[x][62]-1*orbit[x][53]-1*orbit[x][51]-4*orbit[x][49]);
811
+ orbit[x][36] = (f_36-1*orbit[x][68]-2*orbit[x][63]-2*orbit[x][62]-1*orbit[x][51]-3*orbit[x][50]);
812
+ orbit[x][35] = (f_35-1*orbit[x][59]-2*orbit[x][52]-2*orbit[x][45])/2;
813
+ orbit[x][34] = (f_34-1*orbit[x][59]-2*orbit[x][52]-1*orbit[x][51])/2;
814
+ orbit[x][33] = (f_33-1*orbit[x][67]-2*orbit[x][61]-3*orbit[x][58]-4*orbit[x][44]-2*orbit[x][42])/2;
815
+ orbit[x][32] = (f_32-2*orbit[x][66]-1*orbit[x][60]-1*orbit[x][59]-2*orbit[x][57]-2*orbit[x][43]-2*orbit[x][41]-1*orbit[x][40])/2;
816
+ orbit[x][31] = (f_31-2*orbit[x][65]-1*orbit[x][59]-3*orbit[x][56]-1*orbit[x][43]-2*orbit[x][39]);
817
+ orbit[x][30] = (f_30-1*orbit[x][67]-1*orbit[x][63]-2*orbit[x][61]-1*orbit[x][53]-4*orbit[x][44]);
818
+ orbit[x][29] = (f_29-2*orbit[x][66]-2*orbit[x][64]-1*orbit[x][60]-1*orbit[x][59]-1*orbit[x][53]-2*orbit[x][52]-2*orbit[x][43]);
819
+ orbit[x][28] = (f_28-2*orbit[x][65]-2*orbit[x][62]-1*orbit[x][59]-1*orbit[x][51]-1*orbit[x][43]);
820
+ orbit[x][27] = (f_27-1*orbit[x][59]-1*orbit[x][51]-2*orbit[x][45])/2;
821
+ orbit[x][26] = (f_26-2*orbit[x][67]-2*orbit[x][63]-2*orbit[x][61]-6*orbit[x][58]-1*orbit[x][53]-2*orbit[x][47]-2*orbit[x][42]);
822
+ orbit[x][25] = (f_25-2*orbit[x][66]-2*orbit[x][64]-1*orbit[x][59]-2*orbit[x][57]-2*orbit[x][52]-1*orbit[x][48]-1*orbit[x][40])/2;
823
+ orbit[x][24] = (f_24-4*orbit[x][65]-4*orbit[x][62]-1*orbit[x][59]-6*orbit[x][56]-1*orbit[x][51]-2*orbit[x][45]-2*orbit[x][39]);
824
+ orbit[x][23] = (f_23-1*orbit[x][55]-1*orbit[x][42]-2*orbit[x][33])/4;
825
+ orbit[x][22] = (f_22-2*orbit[x][54]-1*orbit[x][40]-1*orbit[x][39]-1*orbit[x][32]-2*orbit[x][31])/3;
826
+ orbit[x][21] = (f_21-3*orbit[x][55]-3*orbit[x][50]-2*orbit[x][42]-2*orbit[x][38]-2*orbit[x][33]);
827
+ orbit[x][20] = (f_20-2*orbit[x][54]-2*orbit[x][49]-1*orbit[x][40]-1*orbit[x][37]-1*orbit[x][32]);
828
+ orbit[x][19] = (f_19-4*orbit[x][54]-4*orbit[x][49]-1*orbit[x][40]-2*orbit[x][39]-1*orbit[x][37]-2*orbit[x][35]-2*orbit[x][31]);
829
+ orbit[x][18] = (f_18-1*orbit[x][59]-1*orbit[x][51]-2*orbit[x][46]-2*orbit[x][45]-2*orbit[x][36]-2*orbit[x][27]-1*orbit[x][24])/2;
830
+ orbit[x][17] = (f_17-1*orbit[x][60]-1*orbit[x][53]-1*orbit[x][51]-1*orbit[x][48]-1*orbit[x][37]-2*orbit[x][34]-2*orbit[x][30])/2;
831
+ orbit[x][16] = (f_16-1*orbit[x][59]-2*orbit[x][52]-1*orbit[x][51]-2*orbit[x][46]-2*orbit[x][36]-2*orbit[x][34]-1*orbit[x][29]);
832
+ orbit[x][15] = (f_15-1*orbit[x][59]-2*orbit[x][52]-1*orbit[x][51]-2*orbit[x][45]-2*orbit[x][35]-2*orbit[x][34]-2*orbit[x][27]);
833
+ }
834
+ endTime = clock();
835
+ printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
836
+
837
+ endTime_all = endTime;
838
+ printf("total: %.2f sec\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
839
+ }
840
+
841
+
842
+ /** count edge orbits of graphlets on max 5 nodes */
843
+ void ecount5() {
844
+ clock_t startTime, endTime;
845
+ startTime = clock();
846
+ clock_t startTime_all, endTime_all;
847
+ startTime_all = startTime;
848
+ int frac,frac_prev;
849
+
850
+ // precompute common nodes
851
+ printf("stage 1 - precomputing common nodes\n");
852
+ frac_prev=-1;
853
+ for (int x=0;x<n;x++) {
854
+ frac = 100LL*x/n;
855
+ if (frac!=frac_prev) {
856
+ printf("%d%%\r",frac);
857
+ frac_prev=frac;
858
+ }
859
+ for (int n1=0;n1<deg[x];n1++) {
860
+ int a=adj[x][n1];
861
+ for (int n2=n1+1;n2<deg[x];n2++) {
862
+ int b=adj[x][n2];
863
+ PAIR ab=PAIR(a,b);
864
+ common2[ab]++;
865
+ for (int n3=n2+1;n3<deg[x];n3++) {
866
+ int c=adj[x][n3];
867
+ int st = adjacent(a,b)+adjacent(a,c)+adjacent(b,c);
868
+ if (st<2) continue;
869
+ TRIPLE abc=TRIPLE(a,b,c);
870
+ common3[abc]++;
871
+ }
872
+ }
873
+ }
874
+ }
875
+ // precompute triangles that span over edges
876
+ int *tri = (int*)calloc(m,sizeof(int));
877
+ for (int i=0;i<m;i++) {
878
+ int x=edges[i].a, y=edges[i].b;
879
+ for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
880
+ if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
881
+ else if (adj[x][xi]<adj[y][yi]) { xi++; }
882
+ else { yi++; }
883
+ }
884
+ }
885
+ endTime = clock();
886
+ printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
887
+ startTime = endTime;
888
+
889
+ // count full graphlets
890
+ printf("stage 2 - counting full graphlets\n");
891
+ int64 *C5 = (int64*)calloc(m,sizeof(int64));
892
+ int *neighx = (int*)malloc(n*sizeof(int)); // lookup table - edges to neighbors of x
893
+ memset(neighx,-1,n*sizeof(int));
894
+ int *neigh = (int*)malloc(n*sizeof(int)), nn; // lookup table - common neighbors of x and y
895
+ PII *neigh_edges = (PII*)malloc(n*sizeof(PII)); // list of common neighbors of x and y
896
+ int *neigh2 = (int*)malloc(n*sizeof(int)), nn2;
897
+ TIII *neigh2_edges = (TIII*)malloc(n*sizeof(TIII));
898
+ frac_prev=-1;
899
+ for (int x=0;x<n;x++) {
900
+ frac = 100LL*x/n;
901
+ if (frac!=frac_prev) {
902
+ printf("%d%%\r",frac);
903
+ frac_prev=frac;
904
+ }
905
+
906
+ for (int nx=0;nx<deg[x];nx++) {
907
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
908
+ neighx[y]=xy;
909
+ }
910
+ for (int nx=0;nx<deg[x];nx++) {
911
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
912
+ if (y >= x) break;
913
+ nn=0;
914
+ for (int ny=0;ny<deg[y];ny++) {
915
+ int z=inc[y][ny].first, yz=inc[y][ny].second;
916
+ if (z >= y) break;
917
+ if (neighx[z]==-1) continue;
918
+ int xz=neighx[z];
919
+ neigh[nn]=z;
920
+ neigh_edges[nn]={xz, yz};
921
+ nn++;
922
+ }
923
+ for (int i=0;i<nn;i++) {
924
+ int z = neigh[i], xz = neigh_edges[i].first, yz = neigh_edges[i].second;
925
+ nn2 = 0;
926
+ for (int j=i+1;j<nn;j++) {
927
+ int w = neigh[j], xw = neigh_edges[j].first, yw = neigh_edges[j].second;
928
+ if (adjacent(z,w)) {
929
+ neigh2[nn2]=w;
930
+ int zw=getEdgeId(z,w);
931
+ neigh2_edges[nn2]={xw,yw,zw};
932
+ nn2++;
933
+ }
934
+ }
935
+ for (int i2=0;i2<nn2;i2++) {
936
+ int z2 = neigh2[i2];
937
+ int z2x=neigh2_edges[i2].first, z2y=neigh2_edges[i2].second, z2z=neigh2_edges[i2].third;
938
+ for (int j2=i2+1;j2<nn2;j2++) {
939
+ int z3 = neigh2[j2];
940
+ int z3x=neigh2_edges[j2].first, z3y=neigh2_edges[j2].second, z3z=neigh2_edges[j2].third;
941
+ if (adjacent(z2,z3)) {
942
+ int zid=getEdgeId(z2,z3);
943
+ C5[xy]++; C5[xz]++; C5[yz]++;
944
+ C5[z2x]++; C5[z2y]++; C5[z2z]++;
945
+ C5[z3x]++; C5[z3y]++; C5[z3z]++;
946
+ C5[zid]++;
947
+ }
948
+ }
949
+ }
950
+ }
951
+ }
952
+ for (int nx=0;nx<deg[x];nx++) {
953
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
954
+ neighx[y]=-1;
955
+ }
956
+ }
957
+ endTime = clock();
958
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
959
+ startTime = endTime;
960
+
961
+ // set up a system of equations relating orbits for every node
962
+ printf("stage 3 - building systems of equations\n");
963
+ int *common_x = (int*)calloc(n,sizeof(int));
964
+ int *common_x_list = (int*)malloc(n*sizeof(int)), nc_x=0;
965
+ int *common_y = (int*)calloc(n,sizeof(int));
966
+ int *common_y_list = (int*)malloc(n*sizeof(int)), nc_y=0;
967
+ frac_prev=-1;
968
+
969
+ for (int x=0;x<n;x++) {
970
+ frac = 100LL*x/n;
971
+ if (frac!=frac_prev) {
972
+ printf("%d%%\r",frac);
973
+ frac_prev=frac;
974
+ }
975
+
976
+ // common nodes of x and some other node
977
+ for (int i=0;i<nc_x;i++) common_x[common_x_list[i]]=0;
978
+ nc_x=0;
979
+ for (int nx=0;nx<deg[x];nx++) {
980
+ int a=adj[x][nx];
981
+ for (int na=0;na<deg[a];na++) {
982
+ int z=adj[a][na];
983
+ if (z==x) continue;
984
+ if (common_x[z]==0) common_x_list[nc_x++]=z;
985
+ common_x[z]++;
986
+ }
987
+ }
988
+
989
+ for (int nx=0;nx<deg[x];nx++) {
990
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
991
+ int e=xy;
992
+ if (y>=x) break;
993
+
994
+ // common nodes of y and some other node
995
+ for (int i=0;i<nc_y;i++) common_y[common_y_list[i]]=0;
996
+ nc_y=0;
997
+ for (int ny=0;ny<deg[y];ny++) {
998
+ int a=adj[y][ny];
999
+ for (int na=0;na<deg[a];na++) {
1000
+ int z=adj[a][na];
1001
+ if (z==y) continue;
1002
+ if (common_y[z]==0) common_y_list[nc_y++]=z;
1003
+ common_y[z]++;
1004
+ }
1005
+ }
1006
+
1007
+ int64 f_66=0, f_65=0, f_62=0, f_61=0, f_60=0, f_51=0, f_50=0; // 11
1008
+ int64 f_64=0, f_58=0, f_55=0, f_48=0, f_41=0, f_35=0; // 10
1009
+ int64 f_63=0, f_59=0, f_57=0, f_54=0, f_53=0, f_52=0, f_47=0, f_40=0, f_39=0, f_34=0, f_33=0; // 9
1010
+ int64 f_45=0, f_36=0, f_26=0, f_23=0, f_19=0; // 7
1011
+ int64 f_49=0, f_38=0, f_37=0, f_32=0, f_25=0, f_22=0, f_18=0; // 6
1012
+ int64 f_56=0, f_46=0, f_44=0, f_43=0, f_42=0, f_31=0, f_30=0; // 5
1013
+ int64 f_27=0, f_17=0, f_15=0; // 4
1014
+ int64 f_20=0, f_16=0, f_13=0; // 3
1015
+ int64 f_29=0, f_28=0, f_24=0, f_21=0, f_14=0, f_12=0; // 2
1016
+
1017
+ // smaller (3-node) graphlets
1018
+ orbit[x][0] = deg[x];
1019
+ for (int nx1=0;nx1<deg[x];nx1++) {
1020
+ int z=adj[x][nx1];
1021
+ if (z==y) continue;
1022
+ if (adjacent(y,z)) eorbit[e][1]++;
1023
+ else eorbit[e][0]++;
1024
+ }
1025
+ for (int ny=0;ny<deg[y];ny++) {
1026
+ int z=adj[y][ny];
1027
+ if (z==x) continue;
1028
+ if (!adjacent(x,z)) eorbit[e][0]++;
1029
+ }
1030
+
1031
+ // edge-orbit 11 = (14,14)
1032
+ for (int nx1=0;nx1<deg[x];nx1++) {
1033
+ int a=adj[x][nx1], xa=inc[x][nx1].second;
1034
+ if (a==y || !adjacent(y,a)) continue;
1035
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
1036
+ int b=adj[x][nx2], xb=inc[x][nx2].second;
1037
+ if (b==y || !adjacent(y,b) || !adjacent(a,b)) continue;
1038
+ int ya=getEdgeId(y,a), yb=getEdgeId(y,b), ab=getEdgeId(a,b);
1039
+ eorbit[e][11]++;
1040
+ f_66 += common3_get(TRIPLE(x,y,a))-1;
1041
+ f_66 += common3_get(TRIPLE(x,y,b))-1;
1042
+ f_65 += common3_get(TRIPLE(a,b,x))-1;
1043
+ f_65 += common3_get(TRIPLE(a,b,y))-1;
1044
+ f_62 += tri[xy]-2;
1045
+ f_61 += (tri[xa]-2)+(tri[xb]-2)+(tri[ya]-2)+(tri[yb]-2);
1046
+ f_60 += tri[ab]-2;
1047
+ f_51 += (deg[x]-3)+(deg[y]-3);
1048
+ f_50 += (deg[a]-3)+(deg[b]-3);
1049
+ }
1050
+ }
1051
+
1052
+ // edge-orbit 10 = (13,13)
1053
+ for (int nx1=0;nx1<deg[x];nx1++) {
1054
+ int a=adj[x][nx1], xa=inc[x][nx1].second;
1055
+ if (a==y || !adjacent(y,a)) continue;
1056
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
1057
+ int b=adj[x][nx2], xb=inc[x][nx2].second;
1058
+ if (b==y || !adjacent(y,b) || adjacent(a,b)) continue;
1059
+ int ya=getEdgeId(y,a), yb=getEdgeId(y,b);
1060
+ eorbit[e][10]++;
1061
+ f_64 += common3_get(TRIPLE(a,b,x))-1;
1062
+ f_64 += common3_get(TRIPLE(a,b,y))-1;
1063
+ f_58 += common2_get(PAIR(a,b))-2;
1064
+ f_55 += (tri[xa]-1)+(tri[xb]-1)+(tri[ya]-1)+(tri[yb]-1);
1065
+ f_48 += tri[xy]-2;
1066
+ f_41 += (deg[a]-2)+(deg[b]-2);
1067
+ f_35 += (deg[x]-3)+(deg[y]-3);
1068
+ }
1069
+ }
1070
+
1071
+ // edge-orbit 9 = (12,13)
1072
+ for (int nx=0;nx<deg[x];nx++) {
1073
+ int a=adj[x][nx], xa=inc[x][nx].second;
1074
+ if (a==y) continue;
1075
+ for (int ny=0;ny<deg[y];ny++) {
1076
+ int b=adj[y][ny], yb=inc[y][ny].second;
1077
+ if (b==x || !adjacent(a,b)) continue;
1078
+ int adj_ya=adjacent(y,a), adj_xb=adjacent(x,b);
1079
+ if (adj_ya+adj_xb!=1) continue;
1080
+ int ab=getEdgeId(a,b);
1081
+ eorbit[e][9]++;
1082
+ if (adj_xb) {
1083
+ int xb=getEdgeId(x,b);
1084
+ f_63 += common3_get(TRIPLE(a,b,y))-1;
1085
+ f_59 += common3_get(TRIPLE(a,b,x));
1086
+ f_57 += common_y[a]-2;
1087
+ f_54 += tri[yb]-1;
1088
+ f_53 += tri[xa]-1;
1089
+ f_47 += tri[xb]-2;
1090
+ f_40 += deg[y]-2;
1091
+ f_39 += deg[a]-2;
1092
+ f_34 += deg[x]-3;
1093
+ f_33 += deg[b]-3;
1094
+ } else if (adj_ya) {
1095
+ int ya=getEdgeId(y,a);
1096
+ f_63 += common3_get(TRIPLE(a,b,x))-1;
1097
+ f_59 += common3_get(TRIPLE(a,b,y));
1098
+ f_57 += common_x[b]-2;
1099
+ f_54 += tri[xa]-1;
1100
+ f_53 += tri[yb]-1;
1101
+ f_47 += tri[ya]-2;
1102
+ f_40 += deg[x]-2;
1103
+ f_39 += deg[b]-2;
1104
+ f_34 += deg[y]-3;
1105
+ f_33 += deg[a]-3;
1106
+ }
1107
+ f_52 += tri[ab]-1;
1108
+ }
1109
+ }
1110
+
1111
+ // edge-orbit 8 = (10,11)
1112
+ for (int nx=0;nx<deg[x];nx++) {
1113
+ int a=adj[x][nx];
1114
+ if (a==y || !adjacent(y,a)) continue;
1115
+ for (int nx1=0;nx1<deg[x];nx1++) {
1116
+ int b=adj[x][nx1];
1117
+ if (b==y || b==a || adjacent(y,b) || adjacent(a,b)) continue;
1118
+ eorbit[e][8]++;
1119
+ }
1120
+ for (int ny1=0;ny1<deg[y];ny1++) {
1121
+ int b=adj[y][ny1];
1122
+ if (b==x || b==a || adjacent(x,b) || adjacent(a,b)) continue;
1123
+ eorbit[e][8]++;
1124
+ }
1125
+ }
1126
+
1127
+ // edge-orbit 7 = (10,10)
1128
+ for (int nx=0;nx<deg[x];nx++) {
1129
+ int a=adj[x][nx];
1130
+ if (a==y || !adjacent(y,a)) continue;
1131
+ for (int na=0;na<deg[a];na++) {
1132
+ int b=adj[a][na], ab=inc[a][na].second;
1133
+ if (b==x || b==y || adjacent(x,b) || adjacent(y,b)) continue;
1134
+ eorbit[e][7]++;
1135
+ f_45 += common_x[b]-1;
1136
+ f_45 += common_y[b]-1;
1137
+ f_36 += tri[ab];
1138
+ f_26 += deg[a]-3;
1139
+ f_23 += deg[b]-1;
1140
+ f_19 += (deg[x]-2)+(deg[y]-2);
1141
+ }
1142
+ }
1143
+
1144
+ // edge-orbit 6 = (9,11)
1145
+ for (int ny1=0;ny1<deg[y];ny1++) {
1146
+ int a=adj[y][ny1], ya=inc[y][ny1].second;
1147
+ if (a==x || adjacent(x,a)) continue;
1148
+ for (int ny2=ny1+1;ny2<deg[y];ny2++) {
1149
+ int b=adj[y][ny2], yb=inc[y][ny2].second;
1150
+ if (b==x || adjacent(x,b) || !adjacent(a,b)) continue;
1151
+ int ab=getEdgeId(a,b);
1152
+ eorbit[e][6]++;
1153
+ f_49 += common3_get(TRIPLE(y,a,b));
1154
+ f_38 += tri[ab]-1;
1155
+ f_37 += tri[xy];
1156
+ f_32 += (tri[ya]-1)+(tri[yb]-1);
1157
+ f_25 += deg[y]-3;
1158
+ f_22 += deg[x]-1;
1159
+ f_18 += (deg[a]-2)+(deg[b]-2);
1160
+ }
1161
+ }
1162
+ for (int nx1=0;nx1<deg[x];nx1++) {
1163
+ int a=adj[x][nx1], xa=inc[x][nx1].second;
1164
+ if (a==y || adjacent(y,a)) continue;
1165
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
1166
+ int b=adj[x][nx2], xb=inc[x][nx2].second;
1167
+ if (b==y || adjacent(y,b) || !adjacent(a,b)) continue;
1168
+ int ab=getEdgeId(a,b);
1169
+ eorbit[e][6]++;
1170
+ f_49 += common3_get(TRIPLE(x,a,b));
1171
+ f_38 += tri[ab]-1;
1172
+ f_37 += tri[xy];
1173
+ f_32 += (tri[xa]-1)+(tri[xb]-1);
1174
+ f_25 += deg[x]-3;
1175
+ f_22 += deg[y]-1;
1176
+ f_18 += (deg[a]-2)+(deg[b]-2);
1177
+ }
1178
+ }
1179
+
1180
+ // edge-orbit 5 = (8,8)
1181
+ for (int nx=0;nx<deg[x];nx++) {
1182
+ int a=adj[x][nx], xa=inc[x][nx].second;
1183
+ if (a==y || adjacent(y,a)) continue;
1184
+ for (int ny=0;ny<deg[y];ny++) {
1185
+ int b=adj[y][ny], yb=inc[y][ny].second;
1186
+ if (b==x || adjacent(x,b) || !adjacent(a,b)) continue;
1187
+ int ab=getEdgeId(a,b);
1188
+ eorbit[e][5]++;
1189
+ f_56 += common3_get(TRIPLE(x,a,b));
1190
+ f_56 += common3_get(TRIPLE(y,a,b));
1191
+ f_46 += tri[xy];
1192
+ f_44 += tri[xa]+tri[yb];
1193
+ f_43 += tri[ab];
1194
+ f_42 += common_x[b]-2;
1195
+ f_42 += common_y[a]-2;
1196
+ f_31 += (deg[x]-2)+(deg[y]-2);
1197
+ f_30 += (deg[a]-2)+(deg[b]-2);
1198
+ }
1199
+ }
1200
+
1201
+ // edge-orbit 4 = (6,7)
1202
+ for (int ny1=0;ny1<deg[y];ny1++) {
1203
+ int a=adj[y][ny1];
1204
+ if (a==x || adjacent(x,a)) continue;
1205
+ for (int ny2=ny1+1;ny2<deg[y];ny2++) {
1206
+ int b=adj[y][ny2];
1207
+ if (b==x || adjacent(x,b) || adjacent(a,b)) continue;
1208
+ eorbit[e][4]++;
1209
+ f_27 += tri[xy];
1210
+ f_17 += deg[y]-3;
1211
+ f_15 += (deg[a]-1)+(deg[b]-1);
1212
+ }
1213
+ }
1214
+ for (int nx1=0;nx1<deg[x];nx1++) {
1215
+ int a=adj[x][nx1];
1216
+ if (a==y || adjacent(y,a)) continue;
1217
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
1218
+ int b=adj[x][nx2];
1219
+ if (b==y || adjacent(y,b) || adjacent(a,b)) continue;
1220
+ eorbit[e][4]++;
1221
+ f_27 += tri[xy];
1222
+ f_17 += deg[x]-3;
1223
+ f_15 += (deg[a]-1)+(deg[b]-1);
1224
+ }
1225
+ }
1226
+
1227
+ // edge-orbit 3 = (5,5)
1228
+ for (int nx=0;nx<deg[x];nx++) {
1229
+ int a=adj[x][nx];
1230
+ if (a==y || adjacent(y,a)) continue;
1231
+ for (int ny=0;ny<deg[y];ny++) {
1232
+ int b=adj[y][ny];
1233
+ if (b==x || adjacent(x,b) || adjacent(a,b)) continue;
1234
+ eorbit[e][3]++;
1235
+ f_20 += tri[xy];
1236
+ f_16 += (deg[x]-2)+(deg[y]-2);
1237
+ f_13 += (deg[a]-1)+(deg[b]-1);
1238
+ }
1239
+ }
1240
+
1241
+ // edge-orbit 2 = (4,5)
1242
+ for (int ny=0;ny<deg[y];ny++) {
1243
+ int a=adj[y][ny];
1244
+ if (a==x || adjacent(x,a)) continue;
1245
+ for (int na=0;na<deg[a];na++) {
1246
+ int b=adj[a][na], ab=inc[a][na].second;
1247
+ if (b==y || adjacent(y,b) || adjacent(x,b)) continue;
1248
+ eorbit[e][2]++;
1249
+ f_29 += common_y[b]-1;
1250
+ f_28 += common_x[b];
1251
+ f_24 += tri[xy];
1252
+ f_21 += tri[ab];
1253
+ f_14 += deg[a]-2;
1254
+ f_12 += deg[b]-1;
1255
+ }
1256
+ }
1257
+ for (int nx=0;nx<deg[x];nx++) {
1258
+ int a=adj[x][nx];
1259
+ if (a==y || adjacent(y,a)) continue;
1260
+ for (int na=0;na<deg[a];na++) {
1261
+ int b=adj[a][na], ab=inc[a][na].second;
1262
+ if (b==x || adjacent(x,b) || adjacent(y,b)) continue;
1263
+ eorbit[e][2]++;
1264
+ f_29 += common_x[b]-1;
1265
+ f_28 += common_y[b];
1266
+ f_24 += tri[xy];
1267
+ f_21 += tri[ab];
1268
+ f_14 += deg[a]-2;
1269
+ f_12 += deg[b]-1;
1270
+ }
1271
+ }
1272
+
1273
+ // solve system of equations
1274
+ eorbit[e][67]=C5[e];
1275
+ eorbit[e][66]=(f_66-6*eorbit[e][67])/2;
1276
+ eorbit[e][65]=(f_65-6*eorbit[e][67]);
1277
+ eorbit[e][64]=(f_64-2*eorbit[e][66]);
1278
+ eorbit[e][63]=(f_63-2*eorbit[e][65])/2;
1279
+ eorbit[e][62]=(f_62-2*eorbit[e][66]-3*eorbit[e][67]);
1280
+ eorbit[e][61]=(f_61-2*eorbit[e][65]-4*eorbit[e][66]-12*eorbit[e][67]);
1281
+ eorbit[e][60]=(f_60-1*eorbit[e][65]-3*eorbit[e][67]);
1282
+ eorbit[e][59]=(f_59-2*eorbit[e][65])/2;
1283
+ eorbit[e][58]=(f_58-1*eorbit[e][64]-1*eorbit[e][66]);
1284
+ eorbit[e][57]=(f_57-2*eorbit[e][63]-2*eorbit[e][64]-2*eorbit[e][65]);
1285
+ eorbit[e][56]=(f_56-2*eorbit[e][63])/2;
1286
+ eorbit[e][55]=(f_55-4*eorbit[e][62]-2*eorbit[e][64]-4*eorbit[e][66]);
1287
+ eorbit[e][54]=(f_54-1*eorbit[e][61]-2*eorbit[e][63]-2*eorbit[e][65])/2;
1288
+ eorbit[e][53]=(f_53-2*eorbit[e][59]-2*eorbit[e][64]-2*eorbit[e][65]);
1289
+ eorbit[e][52]=(f_52-2*eorbit[e][59]-2*eorbit[e][63]-2*eorbit[e][65]);
1290
+ eorbit[e][51]=(f_51-1*eorbit[e][61]-2*eorbit[e][62]-1*eorbit[e][65]-4*eorbit[e][66]-6*eorbit[e][67]);
1291
+ eorbit[e][50]=(f_50-2*eorbit[e][60]-1*eorbit[e][61]-2*eorbit[e][65]-2*eorbit[e][66]-6*eorbit[e][67]);
1292
+ eorbit[e][49]=(f_49-1*eorbit[e][59])/3;
1293
+ eorbit[e][48]=(f_48-2*eorbit[e][62]-1*eorbit[e][66])/3;
1294
+ eorbit[e][47]=(f_47-2*eorbit[e][59]-1*eorbit[e][61]-2*eorbit[e][65])/2;
1295
+ eorbit[e][46]=(f_46-1*eorbit[e][57]-1*eorbit[e][63]);
1296
+ eorbit[e][45]=(f_45-1*eorbit[e][52]-4*eorbit[e][58]-4*eorbit[e][60]);
1297
+ eorbit[e][44]=(f_44-2*eorbit[e][56]-1*eorbit[e][57]-2*eorbit[e][63]);
1298
+ eorbit[e][43]=(f_43-2*eorbit[e][56]-1*eorbit[e][63]);
1299
+ eorbit[e][42]=(f_42-2*eorbit[e][56]-1*eorbit[e][57]-2*eorbit[e][63])/2;
1300
+ eorbit[e][41]=(f_41-1*eorbit[e][55]-2*eorbit[e][58]-2*eorbit[e][62]-2*eorbit[e][64]-2*eorbit[e][66]);
1301
+ eorbit[e][40]=(f_40-2*eorbit[e][54]-1*eorbit[e][55]-1*eorbit[e][57]-1*eorbit[e][61]-2*eorbit[e][63]-2*eorbit[e][64]-2*eorbit[e][65]);
1302
+ eorbit[e][39]=(f_39-1*eorbit[e][52]-1*eorbit[e][53]-1*eorbit[e][57]-2*eorbit[e][59]-2*eorbit[e][63]-2*eorbit[e][64]-2*eorbit[e][65]);
1303
+ eorbit[e][38]=(f_38-3*eorbit[e][49]-1*eorbit[e][56]-1*eorbit[e][59]);
1304
+ eorbit[e][37]=(f_37-1*eorbit[e][53]-1*eorbit[e][59]);
1305
+ eorbit[e][36]=(f_36-1*eorbit[e][52]-2*eorbit[e][60])/2;
1306
+ eorbit[e][35]=(f_35-6*eorbit[e][48]-1*eorbit[e][55]-4*eorbit[e][62]-1*eorbit[e][64]-2*eorbit[e][66]);
1307
+ eorbit[e][34]=(f_34-2*eorbit[e][47]-1*eorbit[e][53]-1*eorbit[e][55]-2*eorbit[e][59]-1*eorbit[e][61]-2*eorbit[e][64]-2*eorbit[e][65]);
1308
+ eorbit[e][33]=(f_33-2*eorbit[e][47]-1*eorbit[e][52]-2*eorbit[e][54]-2*eorbit[e][59]-1*eorbit[e][61]-2*eorbit[e][63]-2*eorbit[e][65]);
1309
+ eorbit[e][32]=(f_32-6*eorbit[e][49]-1*eorbit[e][53]-2*eorbit[e][59])/2;
1310
+ eorbit[e][31]=(f_31-2*eorbit[e][42]-1*eorbit[e][44]-2*eorbit[e][46]-2*eorbit[e][56]-2*eorbit[e][57]-2*eorbit[e][63]);
1311
+ eorbit[e][30]=(f_30-2*eorbit[e][42]-2*eorbit[e][43]-1*eorbit[e][44]-4*eorbit[e][56]-1*eorbit[e][57]-2*eorbit[e][63]);
1312
+ eorbit[e][29]=(f_29-2*eorbit[e][38]-1*eorbit[e][45]-1*eorbit[e][52])/2;
1313
+ eorbit[e][28]=(f_28-2*eorbit[e][43]-1*eorbit[e][45]-1*eorbit[e][52])/2;
1314
+ eorbit[e][27]=(f_27-1*eorbit[e][34]-1*eorbit[e][47]);
1315
+ eorbit[e][26]=(f_26-1*eorbit[e][33]-2*eorbit[e][36]-1*eorbit[e][50]-1*eorbit[e][52]-2*eorbit[e][60])/2;
1316
+ eorbit[e][25]=(f_25-2*eorbit[e][32]-1*eorbit[e][37]-3*eorbit[e][49]-1*eorbit[e][53]-1*eorbit[e][59]);
1317
+ eorbit[e][24]=(f_24-1*eorbit[e][39]-1*eorbit[e][45]-1*eorbit[e][52]);
1318
+ eorbit[e][23]=(f_23-2*eorbit[e][36]-1*eorbit[e][45]-1*eorbit[e][52]-2*eorbit[e][58]-2*eorbit[e][60]);
1319
+ eorbit[e][22]=(f_22-1*eorbit[e][37]-1*eorbit[e][44]-1*eorbit[e][53]-1*eorbit[e][56]-1*eorbit[e][59]);
1320
+ eorbit[e][21]=(f_21-2*eorbit[e][38]-2*eorbit[e][43]-1*eorbit[e][52])/2;
1321
+ eorbit[e][20]=(f_20-1*eorbit[e][40]-1*eorbit[e][54]);
1322
+ eorbit[e][19]=(f_19-1*eorbit[e][33]-2*eorbit[e][41]-1*eorbit[e][45]-2*eorbit[e][50]-1*eorbit[e][52]-4*eorbit[e][58]-4*eorbit[e][60]);
1323
+ eorbit[e][18]=(f_18-2*eorbit[e][32]-2*eorbit[e][38]-1*eorbit[e][44]-6*eorbit[e][49]-1*eorbit[e][53]-2*eorbit[e][56]-2*eorbit[e][59]);
1324
+ eorbit[e][17]=(f_17-2*eorbit[e][25]-1*eorbit[e][27]-1*eorbit[e][32]-1*eorbit[e][34]-1*eorbit[e][47])/3;
1325
+ eorbit[e][16]=(f_16-2*eorbit[e][20]-2*eorbit[e][22]-1*eorbit[e][31]-2*eorbit[e][40]-1*eorbit[e][44]-2*eorbit[e][54])/2;
1326
+ eorbit[e][15]=(f_15-2*eorbit[e][25]-2*eorbit[e][29]-1*eorbit[e][31]-2*eorbit[e][32]-1*eorbit[e][34]-2*eorbit[e][42]-2*eorbit[e][47]);
1327
+ eorbit[e][14]=(f_14-1*eorbit[e][18]-2*eorbit[e][21]-1*eorbit[e][30]-2*eorbit[e][38]-1*eorbit[e][39]-2*eorbit[e][43]-1*eorbit[e][52])/2;
1328
+ eorbit[e][13]=(f_13-2*eorbit[e][22]-2*eorbit[e][28]-1*eorbit[e][31]-1*eorbit[e][40]-2*eorbit[e][44]-2*eorbit[e][54]);
1329
+ eorbit[e][12]=(f_12-2*eorbit[e][21]-2*eorbit[e][28]-2*eorbit[e][29]-2*eorbit[e][38]-2*eorbit[e][43]-1*eorbit[e][45]-1*eorbit[e][52]);
1330
+ }
1331
+ }
1332
+
1333
+ endTime = clock();
1334
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
1335
+
1336
+ endTime_all = endTime;
1337
+ printf("total: %.2f\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
1338
+ }
1339
+
1340
+ int writeResults(int g, const char* output_filename) {
1341
+ fstream fout;
1342
+ if (fout.fail()) {
1343
+ cerr << "Failed to open file " << output_filename << endl;
1344
+ return 1;
1345
+ }
1346
+ fout.open(output_filename, fstream::out | fstream::binary);
1347
+ int no[] = {0,0,1,4,15,73};
1348
+ for (int i=0;i<n;i++) {
1349
+ for (int j=0;j<no[g];j++) {
1350
+ if (j!=0)
1351
+ fout << " ";
1352
+ fout << orbit[i][j];
1353
+ }
1354
+ fout << endl;
1355
+ }
1356
+ fout.close();
1357
+ }
1358
+
1359
+ string writeResultsString(int g) {
1360
+ std::stringstream ss("", ios_base::app | ios_base::out);
1361
+ int no[] = {0,0,1,4,15,73};
1362
+ for (int i=0;i<n;i++) {
1363
+ for (int j=0;j<no[g];j++) {
1364
+ if (j!=0)
1365
+ ss << " ";
1366
+ ss << orbit[i][j];
1367
+ }
1368
+ ss << endl;
1369
+ }
1370
+ return ss.str();
1371
+ }
1372
+
1373
+ int writeEdgeResults(int g, const char* output_filename) {
1374
+ fstream fout;
1375
+ if (fout.fail()) {
1376
+ cerr << "Failed to open file " << output_filename << endl;
1377
+ return 1;
1378
+ }
1379
+ int no[] = {0,0,0,2,12,68};
1380
+ for (int i=0;i<m;i++) {
1381
+ for (int j=0;j<no[g];j++) {
1382
+ if (j!=0) fout << " ";
1383
+ fout << eorbit[i][j];
1384
+ }
1385
+ fout << endl;
1386
+ }
1387
+ fout.close();
1388
+ }
1389
+
1390
+ string writeEdgeResultsString(int g) {
1391
+ std::stringstream ss("", ios_base::app | ios_base::out);
1392
+ int no[] = {0,0,0,2,12,68};
1393
+ for (int i=0;i<m;i++) {
1394
+ for (int j=0;j<no[g];j++) {
1395
+ if (j!=0) ss << " ";
1396
+ ss << eorbit[i][j];
1397
+ }
1398
+ ss << endl;
1399
+ }
1400
+ return ss.str();
1401
+ }
1402
+
1403
+ int motif_counts(const char* orbit_type, int graphlet_size,
1404
+ const char* input_filename, const char* output_filename, string &out_str) {
1405
+ fstream fin; // input and output files
1406
+ // open input, output files
1407
+ if (strcmp(orbit_type, "node")!=0 && strcmp(orbit_type, "edge")!=0) {
1408
+ cerr << "Incorrect orbit type '" << orbit_type << "'. Should be 'node' or 'edge'." << endl;
1409
+ return 0;
1410
+ }
1411
+ if (graphlet_size!=4 && graphlet_size!=5) {
1412
+ cerr << "Incorrect graphlet size " << graphlet_size << ". Should be 4 or 5." << endl;
1413
+ return 0;
1414
+ }
1415
+ fin.open(input_filename, fstream::in);
1416
+ if (fin.fail()) {
1417
+ cerr << "Failed to open file " << input_filename << endl;
1418
+ return 0;
1419
+ }
1420
+ // read input graph
1421
+ fin >> n >> m;
1422
+ int d_max=0;
1423
+ edges = (PAIR*)malloc(m*sizeof(PAIR));
1424
+ deg = (int*)calloc(n,sizeof(int));
1425
+ for (int i=0;i<m;i++) {
1426
+ int a,b;
1427
+ fin >> a >> b;
1428
+ if (!(0<=a && a<n) || !(0<=b && b<n)) {
1429
+ cerr << "Node ids should be between 0 and n-1." << endl;
1430
+ return 0;
1431
+ }
1432
+ if (a==b) {
1433
+ cerr << "Self loops (edge from x to x) are not allowed." << endl;
1434
+ return 0;
1435
+ }
1436
+ deg[a]++; deg[b]++;
1437
+ edges[i]=PAIR(a,b);
1438
+ }
1439
+ for (int i=0;i<n;i++) d_max=max(d_max,deg[i]);
1440
+ printf("nodes: %d\n",n);
1441
+ printf("edges: %d\n",m);
1442
+ printf("max degree: %d\n",d_max);
1443
+ fin.close();
1444
+ if ((int)(set<PAIR>(edges,edges+m).size())!=m) {
1445
+ cerr << "Input file contains duplicate undirected edges." << endl;
1446
+ return 0;
1447
+ }
1448
+ // set up adjacency matrix if it's smaller than 100MB
1449
+ if ((int64)n*n < 100LL*1024*1024*8) {
1450
+ adjacent = adjacent_matrix;
1451
+ adj_matrix = (int*)calloc((n*n)/adj_chunk+1,sizeof(int));
1452
+ for (int i=0;i<m;i++) {
1453
+ int a=edges[i].a, b=edges[i].b;
1454
+ adj_matrix[(a*n+b)/adj_chunk]|=(1<<((a*n+b)%adj_chunk));
1455
+ adj_matrix[(b*n+a)/adj_chunk]|=(1<<((b*n+a)%adj_chunk));
1456
+ }
1457
+ } else {
1458
+ adjacent = adjacent_list;
1459
+ }
1460
+ // set up adjacency, incidence lists
1461
+ adj = (int**)malloc(n*sizeof(int*));
1462
+ for (int i=0;i<n;i++) adj[i] = (int*)malloc(deg[i]*sizeof(int));
1463
+ inc = (PII**)malloc(n*sizeof(PII*));
1464
+ for (int i=0;i<n;i++) inc[i] = (PII*)malloc(deg[i]*sizeof(PII));
1465
+ int *d = (int*)calloc(n,sizeof(int));
1466
+ for (int i=0;i<m;i++) {
1467
+ int a=edges[i].a, b=edges[i].b;
1468
+ adj[a][d[a]]=b; adj[b][d[b]]=a;
1469
+ inc[a][d[a]]=PII(b,i); inc[b][d[b]]=PII(a,i);
1470
+ d[a]++; d[b]++;
1471
+ }
1472
+ for (int i=0;i<n;i++) {
1473
+ sort(adj[i],adj[i]+deg[i]);
1474
+ sort(inc[i],inc[i]+deg[i]);
1475
+ }
1476
+ // initialize orbit counts
1477
+ orbit = (int64**)malloc(n*sizeof(int64*));
1478
+ for (int i=0;i<n;i++) orbit[i] = (int64*)calloc(73,sizeof(int64));
1479
+ // initialize edge orbit counts
1480
+ eorbit = (int64**)malloc(m*sizeof(int64*));
1481
+ for (int i=0;i<m;i++) eorbit[i] = (int64*)calloc(68,sizeof(int64));
1482
+
1483
+ if (strcmp(orbit_type,"node") == 0) {
1484
+ printf("Counting NODE orbits of graphlets on %d nodes.\n\n",graphlet_size);
1485
+ if (graphlet_size==4) count4();
1486
+ if (graphlet_size==5) count5();
1487
+ if (strcmp(output_filename, "std") == 0) {
1488
+ cout << "orbit counts: \n" << writeResultsString(graphlet_size) << endl;
1489
+ } else {
1490
+ out_str = writeResults(graphlet_size, output_filename);
1491
+ }
1492
+ } else {
1493
+ printf("Counting EDGE orbits of graphlets on %d nodes.\n\n",graphlet_size);
1494
+ if (graphlet_size==4) ecount4();
1495
+ if (graphlet_size==5) ecount5();
1496
+ if (strcmp(output_filename, "std") == 0) {
1497
+ cout << "orbit counts: \n" << writeEdgeResultsString(graphlet_size) << endl;
1498
+ } else {
1499
+ out_str = writeEdgeResults(graphlet_size, output_filename);
1500
+ }
1501
+ }
1502
+
1503
+ return 1;
1504
+ }
1505
+
1506
+ int init(int argc, char *argv[]) {
1507
+ if (argc!=5) {
1508
+ cerr << "Incorrect number of arguments." << endl;
1509
+ cerr << "Usage: orca.exe [orbit type: node|edge] [graphlet size: 4/5] [graph - input file] [graphlets - output file]" << endl;
1510
+ return 0;
1511
+ }
1512
+ int graphlet_size;
1513
+ sscanf(argv[2],"%d", &graphlet_size);
1514
+ string out;
1515
+ motif_counts(argv[1], graphlet_size, argv[3], argv[4], out);
1516
+
1517
+ return 1;
1518
+ }
1519
+
1520
+
1521
+ int main(int argc, char *argv[]) {
1522
+
1523
+
1524
+ if (!init(argc, argv)) {
1525
+ // cerr << "Stopping!" << endl;
1526
+ return 1;
1527
+ }
1528
+
1529
+
1530
+ return 0;
1531
+ }
1532
+
analysis/orca/orca.h ADDED
@@ -0,0 +1,1488 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <cstdio>
2
+ #include <cstdlib>
3
+ #include <cstring>
4
+ #include <cassert>
5
+ #include <ctime>
6
+ #include <iostream>
7
+ #include <fstream>
8
+ #include <set>
9
+ #include <unordered_map>
10
+ #include <algorithm>
11
+
12
+ #include <Python.h>
13
+ using namespace std;
14
+
15
+
16
+ typedef long long int64;
17
+ typedef pair<int,int> PII;
18
+ typedef struct { int first, second, third; } TIII;
19
+
20
+ struct PAIR {
21
+ int a, b;
22
+ PAIR(int a0, int b0) { a=min(a0,b0); b=max(a0,b0); }
23
+ };
24
+ bool operator<(const PAIR &x, const PAIR &y) {
25
+ if (x.a==y.a) return x.b<y.b;
26
+ else return x.a<y.a;
27
+ }
28
+ bool operator==(const PAIR &x, const PAIR &y) {
29
+ return x.a==y.a && x.b==y.b;
30
+ }
31
+ struct hash_PAIR {
32
+ size_t operator()(const PAIR &x) const {
33
+ return (x.a<<8) ^ (x.b<<0);
34
+ }
35
+ };
36
+
37
+ struct TRIPLE {
38
+ int a, b, c;
39
+ TRIPLE(int a0, int b0, int c0) {
40
+ a=a0; b=b0; c=c0;
41
+ if (a>b) swap(a,b);
42
+ if (b>c) swap(b,c);
43
+ if (a>b) swap(a,b);
44
+ }
45
+ };
46
+ bool operator<(const TRIPLE &x, const TRIPLE &y) {
47
+ if (x.a==y.a) {
48
+ if (x.b==y.b) return x.c<y.c;
49
+ else return x.b<y.b;
50
+ } else return x.a<y.a;
51
+ }
52
+ bool operator==(const TRIPLE &x, const TRIPLE &y) {
53
+ return x.a==y.a && x.b==y.b && x.c==y.c;
54
+ }
55
+ struct hash_TRIPLE {
56
+ size_t operator()(const TRIPLE &x) const {
57
+ return (x.a<<16) ^ (x.b<<8) ^ (x.c<<0);
58
+ }
59
+ };
60
+
61
+ unordered_map<PAIR, int, hash_PAIR> common2;
62
+ unordered_map<TRIPLE, int, hash_TRIPLE> common3;
63
+ unordered_map<PAIR, int, hash_PAIR>::iterator common2_it;
64
+ unordered_map<TRIPLE, int, hash_TRIPLE>::iterator common3_it;
65
+
66
+ #define common3_get(x) (((common3_it=common3.find(x))!=common3.end())?(common3_it->second):0)
67
+ #define common2_get(x) (((common2_it=common2.find(x))!=common2.end())?(common2_it->second):0)
68
+
69
+ int n,m; // n = number of nodes, m = number of edges
70
+ int *deg; // degrees of individual nodes
71
+ PAIR *edges; // list of edges
72
+
73
+ int **adj; // adj[x] - adjacency list of node x
74
+ PII **inc; // inc[x] - incidence list of node x: (y, edge id)
75
+ bool adjacent_list(int x, int y) { return binary_search(adj[x],adj[x]+deg[x],y); }
76
+ int *adj_matrix; // compressed adjacency matrix
77
+ const int adj_chunk = 8*sizeof(int);
78
+ bool adjacent_matrix(int x, int y) { return adj_matrix[(x*n+y)/adj_chunk]&(1<<((x*n+y)%adj_chunk)); }
79
+ bool (*adjacent)(int,int);
80
+ int getEdgeId(int x, int y) { return inc[x][lower_bound(adj[x],adj[x]+deg[x],y)-adj[x]].second; }
81
+
82
+ int64 **orbit; // orbit[x][o] - how many times does node x participate in orbit o
83
+ int64 **eorbit; // eorbit[x][o] - how many times does node x participate in edge orbit o
84
+
85
+ /** count graphlets on max 4 nodes */
86
+ void count4() {
87
+ clock_t startTime, endTime;
88
+ startTime = clock();
89
+ clock_t startTime_all, endTime_all;
90
+ startTime_all = startTime;
91
+ int frac,frac_prev;
92
+
93
+ // precompute triangles that span over edges
94
+ printf("stage 1 - precomputing common nodes\n");
95
+ int *tri = (int*)calloc(m,sizeof(int));
96
+ frac_prev=-1;
97
+ for (int i=0;i<m;i++) {
98
+ frac = 100LL*i/m;
99
+ if (frac!=frac_prev) {
100
+ printf("%d%%\r",frac);
101
+ frac_prev=frac;
102
+ }
103
+ int x=edges[i].a, y=edges[i].b;
104
+ for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
105
+ if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
106
+ else if (adj[x][xi]<adj[y][yi]) { xi++; }
107
+ else { yi++; }
108
+ }
109
+ }
110
+ endTime = clock();
111
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
112
+ startTime = endTime;
113
+
114
+ // count full graphlets
115
+ printf("stage 2 - counting full graphlets\n");
116
+ int64 *C4 = (int64*)calloc(n,sizeof(int64));
117
+ int *neigh = (int*)malloc(n*sizeof(int)), nn;
118
+ frac_prev=-1;
119
+ for (int x=0;x<n;x++) {
120
+ frac = 100LL*x/n;
121
+ if (frac!=frac_prev) {
122
+ printf("%d%%\r",frac);
123
+ frac_prev=frac;
124
+ }
125
+ for (int nx=0;nx<deg[x];nx++) {
126
+ int y=adj[x][nx];
127
+ if (y >= x) break;
128
+ nn=0;
129
+ for (int ny=0;ny<deg[y];ny++) {
130
+ int z=adj[y][ny];
131
+ if (z >= y) break;
132
+ if (adjacent(x,z)==0) continue;
133
+ neigh[nn++]=z;
134
+ }
135
+ for (int i=0;i<nn;i++) {
136
+ int z = neigh[i];
137
+ for (int j=i+1;j<nn;j++) {
138
+ int zz = neigh[j];
139
+ if (adjacent(z,zz)) {
140
+ C4[x]++; C4[y]++; C4[z]++; C4[zz]++;
141
+ }
142
+ }
143
+ }
144
+ }
145
+ }
146
+ endTime = clock();
147
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
148
+ startTime = endTime;
149
+
150
+ // set up a system of equations relating orbits for every node
151
+ printf("stage 3 - building systems of equations\n");
152
+ int *common = (int*)calloc(n,sizeof(int));
153
+ int *common_list = (int*)malloc(n*sizeof(int)), nc=0;
154
+ frac_prev=-1;
155
+ for (int x=0;x<n;x++) {
156
+ frac = 100LL*x/n;
157
+ if (frac!=frac_prev) {
158
+ printf("%d%%\r",frac);
159
+ frac_prev=frac;
160
+ }
161
+
162
+ int64 f_12_14=0, f_10_13=0;
163
+ int64 f_13_14=0, f_11_13=0;
164
+ int64 f_7_11=0, f_5_8=0;
165
+ int64 f_6_9=0, f_9_12=0, f_4_8=0, f_8_12=0;
166
+ int64 f_14=C4[x];
167
+
168
+ for (int i=0;i<nc;i++) common[common_list[i]]=0;
169
+ nc=0;
170
+
171
+ orbit[x][0]=deg[x];
172
+ // x - middle node
173
+ for (int nx1=0;nx1<deg[x];nx1++) {
174
+ int y=inc[x][nx1].first, ey=inc[x][nx1].second;
175
+ for (int ny=0;ny<deg[y];ny++) {
176
+ int z=inc[y][ny].first, ez=inc[y][ny].second;
177
+ if (adjacent(x,z)) { // triangle
178
+ if (z<y) {
179
+ f_12_14 += tri[ez]-1;
180
+ f_10_13 += (deg[y]-1-tri[ez])+(deg[z]-1-tri[ez]);
181
+ }
182
+ } else {
183
+ if (common[z]==0) common_list[nc++]=z;
184
+ common[z]++;
185
+ }
186
+ }
187
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
188
+ int z=inc[x][nx2].first, ez=inc[x][nx2].second;
189
+ if (adjacent(y,z)) { // triangle
190
+ orbit[x][3]++;
191
+ f_13_14 += (tri[ey]-1)+(tri[ez]-1);
192
+ f_11_13 += (deg[x]-1-tri[ey])+(deg[x]-1-tri[ez]);
193
+ } else { // path
194
+ orbit[x][2]++;
195
+ f_7_11 += (deg[x]-1-tri[ey]-1)+(deg[x]-1-tri[ez]-1);
196
+ f_5_8 += (deg[y]-1-tri[ey])+(deg[z]-1-tri[ez]);
197
+ }
198
+ }
199
+ }
200
+ // x - side node
201
+ for (int nx1=0;nx1<deg[x];nx1++) {
202
+ int y=inc[x][nx1].first, ey=inc[x][nx1].second;
203
+ for (int ny=0;ny<deg[y];ny++) {
204
+ int z=inc[y][ny].first, ez=inc[y][ny].second;
205
+ if (x==z) continue;
206
+ if (!adjacent(x,z)) { // path
207
+ orbit[x][1]++;
208
+ f_6_9 += (deg[y]-1-tri[ey]-1);
209
+ f_9_12 += tri[ez];
210
+ f_4_8 += (deg[z]-1-tri[ez]);
211
+ f_8_12 += (common[z]-1);
212
+ }
213
+ }
214
+ }
215
+
216
+ // solve system of equations
217
+ orbit[x][14]=(f_14);
218
+ orbit[x][13]=(f_13_14-6*f_14)/2;
219
+ orbit[x][12]=(f_12_14-3*f_14);
220
+ orbit[x][11]=(f_11_13-f_13_14+6*f_14)/2;
221
+ orbit[x][10]=(f_10_13-f_13_14+6*f_14);
222
+ orbit[x][9]=(f_9_12-2*f_12_14+6*f_14)/2;
223
+ orbit[x][8]=(f_8_12-2*f_12_14+6*f_14)/2;
224
+ orbit[x][7]=(f_13_14+f_7_11-f_11_13-6*f_14)/6;
225
+ orbit[x][6]=(2*f_12_14+f_6_9-f_9_12-6*f_14)/2;
226
+ orbit[x][5]=(2*f_12_14+f_5_8-f_8_12-6*f_14);
227
+ orbit[x][4]=(2*f_12_14+f_4_8-f_8_12-6*f_14);
228
+ }
229
+
230
+ endTime = clock();
231
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
232
+
233
+ endTime_all = endTime;
234
+ printf("total: %.2f\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
235
+ }
236
+
237
+
238
+ /** count edge orbits of graphlets on max 4 nodes */
239
+ void ecount4() {
240
+ clock_t startTime, endTime;
241
+ startTime = clock();
242
+ clock_t startTime_all, endTime_all;
243
+ startTime_all = startTime;
244
+ int frac,frac_prev;
245
+
246
+ // precompute triangles that span over edges
247
+ printf("stage 1 - precomputing common nodes\n");
248
+ int *tri = (int*)calloc(m,sizeof(int));
249
+ frac_prev=-1;
250
+ for (int i=0;i<m;i++) {
251
+ frac = 100LL*i/m;
252
+ if (frac!=frac_prev) {
253
+ printf("%d%%\r",frac);
254
+ frac_prev=frac;
255
+ }
256
+ int x=edges[i].a, y=edges[i].b;
257
+ for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
258
+ if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
259
+ else if (adj[x][xi]<adj[y][yi]) { xi++; }
260
+ else { yi++; }
261
+ }
262
+ }
263
+ endTime = clock();
264
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
265
+ startTime = endTime;
266
+
267
+ // count full graphlets
268
+ printf("stage 2 - counting full graphlets\n");
269
+ int64 *C4 = (int64*)calloc(m,sizeof(int64));
270
+ int *neighx = (int*)malloc(n*sizeof(int)); // lookup table - edges to neighbors of x
271
+ memset(neighx,-1,n*sizeof(int));
272
+ int *neigh = (int*)malloc(n*sizeof(int)), nn; // lookup table - common neighbors of x and y
273
+ PII *neigh_edges = (PII*)malloc(n*sizeof(PII)); // list of common neighbors of x and y
274
+ frac_prev=-1;
275
+ for (int x=0;x<n;x++) {
276
+ frac = 100LL*x/n;
277
+ if (frac!=frac_prev) {
278
+ printf("%d%%\r",frac);
279
+ frac_prev=frac;
280
+ }
281
+
282
+ for (int nx=0;nx<deg[x];nx++) {
283
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
284
+ neighx[y]=xy;
285
+ }
286
+ for (int nx=0;nx<deg[x];nx++) {
287
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
288
+ if (y >= x) break;
289
+ nn=0;
290
+ for (int ny=0;ny<deg[y];ny++) {
291
+ int z=inc[y][ny].first, yz=inc[y][ny].second;
292
+ if (z >= y) break;
293
+ if (neighx[z]==-1) continue;
294
+ int xz=neighx[z];
295
+ neigh[nn]=z;
296
+ neigh_edges[nn]={xz, yz};
297
+ nn++;
298
+ }
299
+ for (int i=0;i<nn;i++) {
300
+ int z = neigh[i], xz = neigh_edges[i].first, yz = neigh_edges[i].second;
301
+ for (int j=i+1;j<nn;j++) {
302
+ int w = neigh[j], xw = neigh_edges[j].first, yw = neigh_edges[j].second;
303
+ if (adjacent(z,w)) {
304
+ C4[xy]++;
305
+ C4[xz]++; C4[yz]++;
306
+ C4[xw]++; C4[yw]++;
307
+ // another iteration to count this last(smallest) edge instead of calling getEdgeId
308
+ //int zw=getEdgeId(z,w); C4[zw]++;
309
+ }
310
+ }
311
+ }
312
+ }
313
+ for (int nx=0;nx<deg[x];nx++) {
314
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
315
+ neighx[y]=-1;
316
+ }
317
+ }
318
+ endTime = clock();
319
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
320
+ startTime = endTime;
321
+
322
+ // count full graphlets for the smallest edge
323
+ for (int x=0;x<n;x++) {
324
+ frac = 100LL*x/n;
325
+ if (frac!=frac_prev) {
326
+ printf("%d%%\r",frac);
327
+ frac_prev=frac;
328
+ }
329
+ for (int nx=deg[x]-1;nx>=0;nx--) {
330
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
331
+ if (y <= x) break;
332
+ nn=0;
333
+ for (int ny=deg[y]-1;ny>=0;ny--) {
334
+ int z=adj[y][ny];
335
+ if (z <= y) break;
336
+ if (adjacent(x,z)==0) continue;
337
+ neigh[nn++]=z;
338
+ }
339
+ for (int i=0;i<nn;i++) {
340
+ int z = neigh[i];
341
+ for (int j=i+1;j<nn;j++) {
342
+ int zz = neigh[j];
343
+ if (adjacent(z,zz)) {
344
+ C4[xy]++;
345
+ }
346
+ }
347
+ }
348
+ }
349
+ }
350
+ endTime = clock();
351
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
352
+ startTime = endTime;
353
+
354
+ // set up a system of equations relating orbits for every node
355
+ printf("stage 3 - building systems of equations\n");
356
+ int *common = (int*)calloc(n,sizeof(int));
357
+ int *common_list = (int*)malloc(n*sizeof(int)), nc=0;
358
+ frac_prev=-1;
359
+
360
+ for (int x=0;x<n;x++) {
361
+ frac = 100LL*x/n;
362
+ if (frac!=frac_prev) {
363
+ printf("%d%%\r",frac);
364
+ frac_prev=frac;
365
+ }
366
+
367
+ // common nodes of x and some other node
368
+ for (int i=0;i<nc;i++) common[common_list[i]]=0;
369
+ nc=0;
370
+ for (int nx=0;nx<deg[x];nx++) {
371
+ int y=adj[x][nx];
372
+ for (int ny=0;ny<deg[y];ny++) {
373
+ int z=adj[y][ny];
374
+ if (z==x) continue;
375
+ if (common[z]==0) common_list[nc++]=z;
376
+ common[z]++;
377
+ }
378
+ }
379
+
380
+ for (int nx=0;nx<deg[x];nx++) {
381
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
382
+ int e=xy;
383
+ for (int n1=0;n1<deg[x];n1++) {
384
+ int z=inc[x][n1].first, xz=inc[x][n1].second;
385
+ if (z==y) continue;
386
+ if (adjacent(y,z)) { // triangle
387
+ if (x<y) {
388
+ eorbit[e][1]++;
389
+ eorbit[e][10] += tri[xy]-1;
390
+ eorbit[e][7] += deg[z]-2;
391
+ }
392
+ eorbit[e][9] += tri[xz]-1;
393
+ eorbit[e][8] += deg[x]-2;
394
+ }
395
+ }
396
+ for (int n1=0;n1<deg[y];n1++) {
397
+ int z=inc[y][n1].first, yz=inc[y][n1].second;
398
+ if (z==x) continue;
399
+ if (!adjacent(x,z)) { // path x-y-z
400
+ eorbit[e][0]++;
401
+ eorbit[e][6] += tri[yz];
402
+ eorbit[e][5] += common[z]-1;
403
+ eorbit[e][4] += deg[y]-2;
404
+ eorbit[e][3] += deg[x]-1;
405
+ eorbit[e][2] += deg[z]-1;
406
+ }
407
+ }
408
+ }
409
+ }
410
+ // solve system of equations
411
+ for (int e=0;e<m;e++) {
412
+ eorbit[e][11]=C4[e];
413
+ eorbit[e][10]=(eorbit[e][10]-2*eorbit[e][11])/2;
414
+ eorbit[e][9]=(eorbit[e][9]-4*eorbit[e][11]);
415
+ eorbit[e][8]=(eorbit[e][8]-eorbit[e][9]-4*eorbit[e][10]-4*eorbit[e][11]);
416
+ eorbit[e][7]=(eorbit[e][7]-eorbit[e][9]-2*eorbit[e][11]);
417
+ eorbit[e][6]=(eorbit[e][6]-eorbit[e][9])/2;
418
+ eorbit[e][5]=(eorbit[e][5]-eorbit[e][9])/2;
419
+ eorbit[e][4]=(eorbit[e][4]-2*eorbit[e][6]-eorbit[e][8]-eorbit[e][9])/2;
420
+ eorbit[e][3]=(eorbit[e][3]-2*eorbit[e][5]-eorbit[e][8]-eorbit[e][9])/2;
421
+ eorbit[e][2]=(eorbit[e][2]-2*eorbit[e][5]-2*eorbit[e][6]-eorbit[e][9]);
422
+ }
423
+
424
+ endTime = clock();
425
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
426
+
427
+ endTime_all = endTime;
428
+ printf("total: %.2f\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
429
+ }
430
+
431
+
432
+ /** count graphlets on max 5 nodes */
433
+ void count5() {
434
+ clock_t startTime, endTime;
435
+ startTime = clock();
436
+ clock_t startTime_all, endTime_all;
437
+ startTime_all = startTime;
438
+ int frac,frac_prev;
439
+
440
+ // precompute common nodes
441
+ printf("stage 1 - precomputing common nodes\n");
442
+ frac_prev=-1;
443
+ for (int x=0;x<n;x++) {
444
+ frac = 100LL*x/n;
445
+ if (frac!=frac_prev) {
446
+ printf("%d%%\r",frac);
447
+ frac_prev=frac;
448
+ }
449
+ for (int n1=0;n1<deg[x];n1++) {
450
+ int a=adj[x][n1];
451
+ for (int n2=n1+1;n2<deg[x];n2++) {
452
+ int b=adj[x][n2];
453
+ PAIR ab=PAIR(a,b);
454
+ common2[ab]++;
455
+ for (int n3=n2+1;n3<deg[x];n3++) {
456
+ int c=adj[x][n3];
457
+ int st = adjacent(a,b)+adjacent(a,c)+adjacent(b,c);
458
+ if (st<2) continue;
459
+ TRIPLE abc=TRIPLE(a,b,c);
460
+ common3[abc]++;
461
+ }
462
+ }
463
+ }
464
+ }
465
+ // precompute triangles that span over edges
466
+ int *tri = (int*)calloc(m,sizeof(int));
467
+ for (int i=0;i<m;i++) {
468
+ int x=edges[i].a, y=edges[i].b;
469
+ for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
470
+ if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
471
+ else if (adj[x][xi]<adj[y][yi]) { xi++; }
472
+ else { yi++; }
473
+ }
474
+ }
475
+ endTime = clock();
476
+ printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
477
+ startTime = endTime;
478
+
479
+ // count full graphlets
480
+ printf("stage 2 - counting full graphlets\n");
481
+ int64 *C5 = (int64*)calloc(n,sizeof(int64));
482
+ int *neigh = (int*)malloc(n*sizeof(int)), nn;
483
+ int *neigh2 = (int*)malloc(n*sizeof(int)), nn2;
484
+ frac_prev=-1;
485
+ for (int x=0;x<n;x++) {
486
+ frac = 100LL*x/n;
487
+ if (frac!=frac_prev) {
488
+ printf("%d%%\r",frac);
489
+ frac_prev=frac;
490
+ }
491
+ for (int nx=0;nx<deg[x];nx++) {
492
+ int y=adj[x][nx];
493
+ if (y >= x) break;
494
+ nn=0;
495
+ for (int ny=0;ny<deg[y];ny++) {
496
+ int z=adj[y][ny];
497
+ if (z >= y) break;
498
+ if (adjacent(x,z)) {
499
+ neigh[nn++]=z;
500
+ }
501
+ }
502
+ for (int i=0;i<nn;i++) {
503
+ int z = neigh[i];
504
+ nn2=0;
505
+ for (int j=i+1;j<nn;j++) {
506
+ int zz = neigh[j];
507
+ if (adjacent(z,zz)) {
508
+ neigh2[nn2++]=zz;
509
+ }
510
+ }
511
+ for (int i2=0;i2<nn2;i2++) {
512
+ int zz = neigh2[i2];
513
+ for (int j2=i2+1;j2<nn2;j2++) {
514
+ int zzz = neigh2[j2];
515
+ if (adjacent(zz,zzz)) {
516
+ C5[x]++; C5[y]++; C5[z]++; C5[zz]++; C5[zzz]++;
517
+ }
518
+ }
519
+ }
520
+ }
521
+ }
522
+ }
523
+ endTime = clock();
524
+ printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
525
+ startTime = endTime;
526
+
527
+ int *common_x = (int*)calloc(n,sizeof(int));
528
+ int *common_x_list = (int*)malloc(n*sizeof(int)), ncx=0;
529
+ int *common_a = (int*)calloc(n,sizeof(int));
530
+ int *common_a_list = (int*)malloc(n*sizeof(int)), nca=0;
531
+
532
+ // set up a system of equations relating orbit counts
533
+ printf("stage 3 - building systems of equations\n");
534
+ frac_prev=-1;
535
+ for (int x=0;x<n;x++) {
536
+ frac = 100LL*x/n;
537
+ if (frac!=frac_prev) {
538
+ printf("%d%%\r",frac);
539
+ frac_prev=frac;
540
+ }
541
+
542
+ for (int i=0;i<ncx;i++) common_x[common_x_list[i]]=0;
543
+ ncx=0;
544
+
545
+ // smaller graphlets
546
+ orbit[x][0] = deg[x];
547
+ for (int nx1=0;nx1<deg[x];nx1++) {
548
+ int a=adj[x][nx1];
549
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
550
+ int b=adj[x][nx2];
551
+ if (adjacent(a,b)) orbit[x][3]++;
552
+ else orbit[x][2]++;
553
+ }
554
+ for (int na=0;na<deg[a];na++) {
555
+ int b=adj[a][na];
556
+ if (b!=x && !adjacent(x,b)) {
557
+ orbit[x][1]++;
558
+ if (common_x[b]==0) common_x_list[ncx++]=b;
559
+ common_x[b]++;
560
+ }
561
+ }
562
+ }
563
+
564
+ int64 f_71=0, f_70=0, f_67=0, f_66=0, f_58=0, f_57=0; // 14
565
+ int64 f_69=0, f_68=0, f_64=0, f_61=0, f_60=0, f_55=0, f_48=0, f_42=0, f_41=0; // 13
566
+ int64 f_65=0, f_63=0, f_59=0, f_54=0, f_47=0, f_46=0, f_40=0; // 12
567
+ int64 f_62=0, f_53=0, f_51=0, f_50=0, f_49=0, f_38=0, f_37=0, f_36=0; // 8
568
+ int64 f_44=0, f_33=0, f_30=0, f_26=0; // 11
569
+ int64 f_52=0, f_43=0, f_32=0, f_29=0, f_25=0; // 10
570
+ int64 f_56=0, f_45=0, f_39=0, f_31=0, f_28=0, f_24=0; // 9
571
+ int64 f_35=0, f_34=0, f_27=0, f_18=0, f_16=0, f_15=0; // 4
572
+ int64 f_17=0; // 5
573
+ int64 f_22=0, f_20=0, f_19=0; // 6
574
+ int64 f_23=0, f_21=0; // 7
575
+
576
+ for (int nx1=0;nx1<deg[x];nx1++) {
577
+ int a=inc[x][nx1].first, xa=inc[x][nx1].second;
578
+
579
+ for (int i=0;i<nca;i++) common_a[common_a_list[i]]=0;
580
+ nca=0;
581
+ for (int na=0;na<deg[a];na++) {
582
+ int b=adj[a][na];
583
+ for (int nb=0;nb<deg[b];nb++) {
584
+ int c=adj[b][nb];
585
+ if (c==a || adjacent(a,c)) continue;
586
+ if (common_a[c]==0) common_a_list[nca++]=c;
587
+ common_a[c]++;
588
+ }
589
+ }
590
+
591
+ // x = orbit-14 (tetrahedron)
592
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
593
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
594
+ if (!adjacent(a,b)) continue;
595
+ for (int nx3=nx2+1;nx3<deg[x];nx3++) {
596
+ int c=inc[x][nx3].first, xc=inc[x][nx3].second;
597
+ if (!adjacent(a,c) || !adjacent(b,c)) continue;
598
+ orbit[x][14]++;
599
+ f_70 += common3_get(TRIPLE(a,b,c))-1;
600
+ f_71 += (tri[xa]>2 && tri[xb]>2)?(common3_get(TRIPLE(x,a,b))-1):0;
601
+ f_71 += (tri[xa]>2 && tri[xc]>2)?(common3_get(TRIPLE(x,a,c))-1):0;
602
+ f_71 += (tri[xb]>2 && tri[xc]>2)?(common3_get(TRIPLE(x,b,c))-1):0;
603
+ f_67 += tri[xa]-2+tri[xb]-2+tri[xc]-2;
604
+ f_66 += common2_get(PAIR(a,b))-2;
605
+ f_66 += common2_get(PAIR(a,c))-2;
606
+ f_66 += common2_get(PAIR(b,c))-2;
607
+ f_58 += deg[x]-3;
608
+ f_57 += deg[a]-3+deg[b]-3+deg[c]-3;
609
+ }
610
+ }
611
+
612
+ // x = orbit-13 (diamond)
613
+ for (int nx2=0;nx2<deg[x];nx2++) {
614
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
615
+ if (!adjacent(a,b)) continue;
616
+ for (int nx3=nx2+1;nx3<deg[x];nx3++) {
617
+ int c=inc[x][nx3].first, xc=inc[x][nx3].second;
618
+ if (!adjacent(a,c) || adjacent(b,c)) continue;
619
+ orbit[x][13]++;
620
+ f_69 += (tri[xb]>1 && tri[xc]>1)?(common3_get(TRIPLE(x,b,c))-1):0;
621
+ f_68 += common3_get(TRIPLE(a,b,c))-1;
622
+ f_64 += common2_get(PAIR(b,c))-2;
623
+ f_61 += tri[xb]-1+tri[xc]-1;
624
+ f_60 += common2_get(PAIR(a,b))-1;
625
+ f_60 += common2_get(PAIR(a,c))-1;
626
+ f_55 += tri[xa]-2;
627
+ f_48 += deg[b]-2+deg[c]-2;
628
+ f_42 += deg[x]-3;
629
+ f_41 += deg[a]-3;
630
+ }
631
+ }
632
+
633
+ // x = orbit-12 (diamond)
634
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
635
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
636
+ if (!adjacent(a,b)) continue;
637
+ for (int na=0;na<deg[a];na++) {
638
+ int c=inc[a][na].first, ac=inc[a][na].second;
639
+ if (c==x || adjacent(x,c) || !adjacent(b,c)) continue;
640
+ orbit[x][12]++;
641
+ f_65 += (tri[ac]>1)?common3_get(TRIPLE(a,b,c)):0;
642
+ f_63 += common_x[c]-2;
643
+ f_59 += tri[ac]-1+common2_get(PAIR(b,c))-1;
644
+ f_54 += common2_get(PAIR(a,b))-2;
645
+ f_47 += deg[x]-2;
646
+ f_46 += deg[c]-2;
647
+ f_40 += deg[a]-3+deg[b]-3;
648
+ }
649
+ }
650
+
651
+ // x = orbit-8 (cycle)
652
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
653
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
654
+ if (adjacent(a,b)) continue;
655
+ for (int na=0;na<deg[a];na++) {
656
+ int c=inc[a][na].first, ac=inc[a][na].second;
657
+ if (c==x || adjacent(x,c) || !adjacent(b,c)) continue;
658
+ orbit[x][8]++;
659
+ f_62 += (tri[ac]>0)?common3_get(TRIPLE(a,b,c)):0;
660
+ f_53 += tri[xa]+tri[xb];
661
+ f_51 += tri[ac]+common2_get(PAIR(c,b));
662
+ f_50 += common_x[c]-2;
663
+ f_49 += common_a[b]-2;
664
+ f_38 += deg[x]-2;
665
+ f_37 += deg[a]-2+deg[b]-2;
666
+ f_36 += deg[c]-2;
667
+ }
668
+ }
669
+
670
+ // x = orbit-11 (paw)
671
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
672
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
673
+ if (!adjacent(a,b)) continue;
674
+ for (int nx3=0;nx3<deg[x];nx3++) {
675
+ int c=inc[x][nx3].first, xc=inc[x][nx3].second;
676
+ if (c==a || c==b || adjacent(a,c) || adjacent(b,c)) continue;
677
+ orbit[x][11]++;
678
+ f_44 += tri[xc];
679
+ f_33 += deg[x]-3;
680
+ f_30 += deg[c]-1;
681
+ f_26 += deg[a]-2+deg[b]-2;
682
+ }
683
+ }
684
+
685
+ // x = orbit-10 (paw)
686
+ for (int nx2=0;nx2<deg[x];nx2++) {
687
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
688
+ if (!adjacent(a,b)) continue;
689
+ for (int nb=0;nb<deg[b];nb++) {
690
+ int c=inc[b][nb].first, bc=inc[b][nb].second;
691
+ if (c==x || c==a || adjacent(a,c) || adjacent(x,c)) continue;
692
+ orbit[x][10]++;
693
+ f_52 += common_a[c]-1;
694
+ f_43 += tri[bc];
695
+ f_32 += deg[b]-3;
696
+ f_29 += deg[c]-1;
697
+ f_25 += deg[a]-2;
698
+ }
699
+ }
700
+
701
+ // x = orbit-9 (paw)
702
+ for (int na1=0;na1<deg[a];na1++) {
703
+ int b=inc[a][na1].first, ab=inc[a][na1].second;
704
+ if (b==x || adjacent(x,b)) continue;
705
+ for (int na2=na1+1;na2<deg[a];na2++) {
706
+ int c=inc[a][na2].first, ac=inc[a][na2].second;
707
+ if (c==x || !adjacent(b,c) || adjacent(x,c)) continue;
708
+ orbit[x][9]++;
709
+ f_56 += (tri[ab]>1 && tri[ac]>1)?common3_get(TRIPLE(a,b,c)):0;
710
+ f_45 += common2_get(PAIR(b,c))-1;
711
+ f_39 += tri[ab]-1+tri[ac]-1;
712
+ f_31 += deg[a]-3;
713
+ f_28 += deg[x]-1;
714
+ f_24 += deg[b]-2+deg[c]-2;
715
+ }
716
+ }
717
+
718
+ // x = orbit-4 (path)
719
+ for (int na=0;na<deg[a];na++) {
720
+ int b=inc[a][na].first, ab=inc[a][na].second;
721
+ if (b==x || adjacent(x,b)) continue;
722
+ for (int nb=0;nb<deg[b];nb++) {
723
+ int c=inc[b][nb].first, bc=inc[b][nb].second;
724
+ if (c==a || adjacent(a,c) || adjacent(x,c)) continue;
725
+ orbit[x][4]++;
726
+ f_35 += common_a[c]-1;
727
+ f_34 += common_x[c];
728
+ f_27 += tri[bc];
729
+ f_18 += deg[b]-2;
730
+ f_16 += deg[x]-1;
731
+ f_15 += deg[c]-1;
732
+ }
733
+ }
734
+
735
+ // x = orbit-5 (path)
736
+ for (int nx2=0;nx2<deg[x];nx2++) {
737
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
738
+ if (b==a || adjacent(a,b)) continue;
739
+ for (int nb=0;nb<deg[b];nb++) {
740
+ int c=inc[b][nb].first, bc=inc[b][nb].second;
741
+ if (c==x || adjacent(a,c) || adjacent(x,c)) continue;
742
+ orbit[x][5]++;
743
+ f_17 += deg[a]-1;
744
+ }
745
+ }
746
+
747
+ // x = orbit-6 (claw)
748
+ for (int na1=0;na1<deg[a];na1++) {
749
+ int b=inc[a][na1].first, ab=inc[a][na1].second;
750
+ if (b==x || adjacent(x,b)) continue;
751
+ for (int na2=na1+1;na2<deg[a];na2++) {
752
+ int c=inc[a][na2].first, ac=inc[a][na2].second;
753
+ if (c==x || adjacent(x,c) || adjacent(b,c)) continue;
754
+ orbit[x][6]++;
755
+ f_22 += deg[a]-3;
756
+ f_20 += deg[x]-1;
757
+ f_19 += deg[b]-1+deg[c]-1;
758
+ }
759
+ }
760
+
761
+ // x = orbit-7 (claw)
762
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
763
+ int b=inc[x][nx2].first, xb=inc[x][nx2].second;
764
+ if (adjacent(a,b)) continue;
765
+ for (int nx3=nx2+1;nx3<deg[x];nx3++) {
766
+ int c=inc[x][nx3].first, xc=inc[x][nx3].second;
767
+ if (adjacent(a,c) || adjacent(b,c)) continue;
768
+ orbit[x][7]++;
769
+ f_23 += deg[x]-3;
770
+ f_21 += deg[a]-1+deg[b]-1+deg[c]-1;
771
+ }
772
+ }
773
+ }
774
+
775
+ // solve equations
776
+ orbit[x][72] = C5[x];
777
+ orbit[x][71] = (f_71-12*orbit[x][72])/2;
778
+ orbit[x][70] = (f_70-4*orbit[x][72]);
779
+ orbit[x][69] = (f_69-2*orbit[x][71])/4;
780
+ orbit[x][68] = (f_68-2*orbit[x][71]);
781
+ orbit[x][67] = (f_67-12*orbit[x][72]-4*orbit[x][71]);
782
+ orbit[x][66] = (f_66-12*orbit[x][72]-2*orbit[x][71]-3*orbit[x][70]);
783
+ orbit[x][65] = (f_65-3*orbit[x][70])/2;
784
+ orbit[x][64] = (f_64-2*orbit[x][71]-4*orbit[x][69]-1*orbit[x][68]);
785
+ orbit[x][63] = (f_63-3*orbit[x][70]-2*orbit[x][68]);
786
+ orbit[x][62] = (f_62-1*orbit[x][68])/2;
787
+ orbit[x][61] = (f_61-4*orbit[x][71]-8*orbit[x][69]-2*orbit[x][67])/2;
788
+ orbit[x][60] = (f_60-4*orbit[x][71]-2*orbit[x][68]-2*orbit[x][67]);
789
+ orbit[x][59] = (f_59-6*orbit[x][70]-2*orbit[x][68]-4*orbit[x][65]);
790
+ orbit[x][58] = (f_58-4*orbit[x][72]-2*orbit[x][71]-1*orbit[x][67]);
791
+ orbit[x][57] = (f_57-12*orbit[x][72]-4*orbit[x][71]-3*orbit[x][70]-1*orbit[x][67]-2*orbit[x][66]);
792
+ orbit[x][56] = (f_56-2*orbit[x][65])/3;
793
+ orbit[x][55] = (f_55-2*orbit[x][71]-2*orbit[x][67])/3;
794
+ orbit[x][54] = (f_54-3*orbit[x][70]-1*orbit[x][66]-2*orbit[x][65])/2;
795
+ orbit[x][53] = (f_53-2*orbit[x][68]-2*orbit[x][64]-2*orbit[x][63]);
796
+ orbit[x][52] = (f_52-2*orbit[x][66]-2*orbit[x][64]-1*orbit[x][59])/2;
797
+ orbit[x][51] = (f_51-2*orbit[x][68]-2*orbit[x][63]-4*orbit[x][62]);
798
+ orbit[x][50] = (f_50-1*orbit[x][68]-2*orbit[x][63])/3;
799
+ orbit[x][49] = (f_49-1*orbit[x][68]-1*orbit[x][64]-2*orbit[x][62])/2;
800
+ orbit[x][48] = (f_48-4*orbit[x][71]-8*orbit[x][69]-2*orbit[x][68]-2*orbit[x][67]-2*orbit[x][64]-2*orbit[x][61]-1*orbit[x][60]);
801
+ orbit[x][47] = (f_47-3*orbit[x][70]-2*orbit[x][68]-1*orbit[x][66]-1*orbit[x][63]-1*orbit[x][60]);
802
+ orbit[x][46] = (f_46-3*orbit[x][70]-2*orbit[x][68]-2*orbit[x][65]-1*orbit[x][63]-1*orbit[x][59]);
803
+ orbit[x][45] = (f_45-2*orbit[x][65]-2*orbit[x][62]-3*orbit[x][56]);
804
+ orbit[x][44] = (f_44-1*orbit[x][67]-2*orbit[x][61])/4;
805
+ orbit[x][43] = (f_43-2*orbit[x][66]-1*orbit[x][60]-1*orbit[x][59])/2;
806
+ orbit[x][42] = (f_42-2*orbit[x][71]-4*orbit[x][69]-2*orbit[x][67]-2*orbit[x][61]-3*orbit[x][55]);
807
+ orbit[x][41] = (f_41-2*orbit[x][71]-1*orbit[x][68]-2*orbit[x][67]-1*orbit[x][60]-3*orbit[x][55]);
808
+ orbit[x][40] = (f_40-6*orbit[x][70]-2*orbit[x][68]-2*orbit[x][66]-4*orbit[x][65]-1*orbit[x][60]-1*orbit[x][59]-4*orbit[x][54]);
809
+ orbit[x][39] = (f_39-4*orbit[x][65]-1*orbit[x][59]-6*orbit[x][56])/2;
810
+ orbit[x][38] = (f_38-1*orbit[x][68]-1*orbit[x][64]-2*orbit[x][63]-1*orbit[x][53]-3*orbit[x][50]);
811
+ orbit[x][37] = (f_37-2*orbit[x][68]-2*orbit[x][64]-2*orbit[x][63]-4*orbit[x][62]-1*orbit[x][53]-1*orbit[x][51]-4*orbit[x][49]);
812
+ orbit[x][36] = (f_36-1*orbit[x][68]-2*orbit[x][63]-2*orbit[x][62]-1*orbit[x][51]-3*orbit[x][50]);
813
+ orbit[x][35] = (f_35-1*orbit[x][59]-2*orbit[x][52]-2*orbit[x][45])/2;
814
+ orbit[x][34] = (f_34-1*orbit[x][59]-2*orbit[x][52]-1*orbit[x][51])/2;
815
+ orbit[x][33] = (f_33-1*orbit[x][67]-2*orbit[x][61]-3*orbit[x][58]-4*orbit[x][44]-2*orbit[x][42])/2;
816
+ orbit[x][32] = (f_32-2*orbit[x][66]-1*orbit[x][60]-1*orbit[x][59]-2*orbit[x][57]-2*orbit[x][43]-2*orbit[x][41]-1*orbit[x][40])/2;
817
+ orbit[x][31] = (f_31-2*orbit[x][65]-1*orbit[x][59]-3*orbit[x][56]-1*orbit[x][43]-2*orbit[x][39]);
818
+ orbit[x][30] = (f_30-1*orbit[x][67]-1*orbit[x][63]-2*orbit[x][61]-1*orbit[x][53]-4*orbit[x][44]);
819
+ orbit[x][29] = (f_29-2*orbit[x][66]-2*orbit[x][64]-1*orbit[x][60]-1*orbit[x][59]-1*orbit[x][53]-2*orbit[x][52]-2*orbit[x][43]);
820
+ orbit[x][28] = (f_28-2*orbit[x][65]-2*orbit[x][62]-1*orbit[x][59]-1*orbit[x][51]-1*orbit[x][43]);
821
+ orbit[x][27] = (f_27-1*orbit[x][59]-1*orbit[x][51]-2*orbit[x][45])/2;
822
+ orbit[x][26] = (f_26-2*orbit[x][67]-2*orbit[x][63]-2*orbit[x][61]-6*orbit[x][58]-1*orbit[x][53]-2*orbit[x][47]-2*orbit[x][42]);
823
+ orbit[x][25] = (f_25-2*orbit[x][66]-2*orbit[x][64]-1*orbit[x][59]-2*orbit[x][57]-2*orbit[x][52]-1*orbit[x][48]-1*orbit[x][40])/2;
824
+ orbit[x][24] = (f_24-4*orbit[x][65]-4*orbit[x][62]-1*orbit[x][59]-6*orbit[x][56]-1*orbit[x][51]-2*orbit[x][45]-2*orbit[x][39]);
825
+ orbit[x][23] = (f_23-1*orbit[x][55]-1*orbit[x][42]-2*orbit[x][33])/4;
826
+ orbit[x][22] = (f_22-2*orbit[x][54]-1*orbit[x][40]-1*orbit[x][39]-1*orbit[x][32]-2*orbit[x][31])/3;
827
+ orbit[x][21] = (f_21-3*orbit[x][55]-3*orbit[x][50]-2*orbit[x][42]-2*orbit[x][38]-2*orbit[x][33]);
828
+ orbit[x][20] = (f_20-2*orbit[x][54]-2*orbit[x][49]-1*orbit[x][40]-1*orbit[x][37]-1*orbit[x][32]);
829
+ orbit[x][19] = (f_19-4*orbit[x][54]-4*orbit[x][49]-1*orbit[x][40]-2*orbit[x][39]-1*orbit[x][37]-2*orbit[x][35]-2*orbit[x][31]);
830
+ orbit[x][18] = (f_18-1*orbit[x][59]-1*orbit[x][51]-2*orbit[x][46]-2*orbit[x][45]-2*orbit[x][36]-2*orbit[x][27]-1*orbit[x][24])/2;
831
+ orbit[x][17] = (f_17-1*orbit[x][60]-1*orbit[x][53]-1*orbit[x][51]-1*orbit[x][48]-1*orbit[x][37]-2*orbit[x][34]-2*orbit[x][30])/2;
832
+ orbit[x][16] = (f_16-1*orbit[x][59]-2*orbit[x][52]-1*orbit[x][51]-2*orbit[x][46]-2*orbit[x][36]-2*orbit[x][34]-1*orbit[x][29]);
833
+ orbit[x][15] = (f_15-1*orbit[x][59]-2*orbit[x][52]-1*orbit[x][51]-2*orbit[x][45]-2*orbit[x][35]-2*orbit[x][34]-2*orbit[x][27]);
834
+ }
835
+ endTime = clock();
836
+ printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
837
+
838
+ endTime_all = endTime;
839
+ printf("total: %.2f sec\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
840
+ }
841
+
842
+
843
+ /** count edge orbits of graphlets on max 5 nodes */
844
+ void ecount5() {
845
+ clock_t startTime, endTime;
846
+ startTime = clock();
847
+ clock_t startTime_all, endTime_all;
848
+ startTime_all = startTime;
849
+ int frac,frac_prev;
850
+
851
+ // precompute common nodes
852
+ printf("stage 1 - precomputing common nodes\n");
853
+ frac_prev=-1;
854
+ for (int x=0;x<n;x++) {
855
+ frac = 100LL*x/n;
856
+ if (frac!=frac_prev) {
857
+ printf("%d%%\r",frac);
858
+ frac_prev=frac;
859
+ }
860
+ for (int n1=0;n1<deg[x];n1++) {
861
+ int a=adj[x][n1];
862
+ for (int n2=n1+1;n2<deg[x];n2++) {
863
+ int b=adj[x][n2];
864
+ PAIR ab=PAIR(a,b);
865
+ common2[ab]++;
866
+ for (int n3=n2+1;n3<deg[x];n3++) {
867
+ int c=adj[x][n3];
868
+ int st = adjacent(a,b)+adjacent(a,c)+adjacent(b,c);
869
+ if (st<2) continue;
870
+ TRIPLE abc=TRIPLE(a,b,c);
871
+ common3[abc]++;
872
+ }
873
+ }
874
+ }
875
+ }
876
+ // precompute triangles that span over edges
877
+ int *tri = (int*)calloc(m,sizeof(int));
878
+ for (int i=0;i<m;i++) {
879
+ int x=edges[i].a, y=edges[i].b;
880
+ for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
881
+ if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
882
+ else if (adj[x][xi]<adj[y][yi]) { xi++; }
883
+ else { yi++; }
884
+ }
885
+ }
886
+ endTime = clock();
887
+ printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
888
+ startTime = endTime;
889
+
890
+ // count full graphlets
891
+ printf("stage 2 - counting full graphlets\n");
892
+ int64 *C5 = (int64*)calloc(m,sizeof(int64));
893
+ int *neighx = (int*)malloc(n*sizeof(int)); // lookup table - edges to neighbors of x
894
+ memset(neighx,-1,n*sizeof(int));
895
+ int *neigh = (int*)malloc(n*sizeof(int)), nn; // lookup table - common neighbors of x and y
896
+ PII *neigh_edges = (PII*)malloc(n*sizeof(PII)); // list of common neighbors of x and y
897
+ int *neigh2 = (int*)malloc(n*sizeof(int)), nn2;
898
+ TIII *neigh2_edges = (TIII*)malloc(n*sizeof(TIII));
899
+ frac_prev=-1;
900
+ for (int x=0;x<n;x++) {
901
+ frac = 100LL*x/n;
902
+ if (frac!=frac_prev) {
903
+ printf("%d%%\r",frac);
904
+ frac_prev=frac;
905
+ }
906
+
907
+ for (int nx=0;nx<deg[x];nx++) {
908
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
909
+ neighx[y]=xy;
910
+ }
911
+ for (int nx=0;nx<deg[x];nx++) {
912
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
913
+ if (y >= x) break;
914
+ nn=0;
915
+ for (int ny=0;ny<deg[y];ny++) {
916
+ int z=inc[y][ny].first, yz=inc[y][ny].second;
917
+ if (z >= y) break;
918
+ if (neighx[z]==-1) continue;
919
+ int xz=neighx[z];
920
+ neigh[nn]=z;
921
+ neigh_edges[nn]={xz, yz};
922
+ nn++;
923
+ }
924
+ for (int i=0;i<nn;i++) {
925
+ int z = neigh[i], xz = neigh_edges[i].first, yz = neigh_edges[i].second;
926
+ nn2 = 0;
927
+ for (int j=i+1;j<nn;j++) {
928
+ int w = neigh[j], xw = neigh_edges[j].first, yw = neigh_edges[j].second;
929
+ if (adjacent(z,w)) {
930
+ neigh2[nn2]=w;
931
+ int zw=getEdgeId(z,w);
932
+ neigh2_edges[nn2]={xw,yw,zw};
933
+ nn2++;
934
+ }
935
+ }
936
+ for (int i2=0;i2<nn2;i2++) {
937
+ int z2 = neigh2[i2];
938
+ int z2x=neigh2_edges[i2].first, z2y=neigh2_edges[i2].second, z2z=neigh2_edges[i2].third;
939
+ for (int j2=i2+1;j2<nn2;j2++) {
940
+ int z3 = neigh2[j2];
941
+ int z3x=neigh2_edges[j2].first, z3y=neigh2_edges[j2].second, z3z=neigh2_edges[j2].third;
942
+ if (adjacent(z2,z3)) {
943
+ int zid=getEdgeId(z2,z3);
944
+ C5[xy]++; C5[xz]++; C5[yz]++;
945
+ C5[z2x]++; C5[z2y]++; C5[z2z]++;
946
+ C5[z3x]++; C5[z3y]++; C5[z3z]++;
947
+ C5[zid]++;
948
+ }
949
+ }
950
+ }
951
+ }
952
+ }
953
+ for (int nx=0;nx<deg[x];nx++) {
954
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
955
+ neighx[y]=-1;
956
+ }
957
+ }
958
+ endTime = clock();
959
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
960
+ startTime = endTime;
961
+
962
+ // set up a system of equations relating orbits for every node
963
+ printf("stage 3 - building systems of equations\n");
964
+ int *common_x = (int*)calloc(n,sizeof(int));
965
+ int *common_x_list = (int*)malloc(n*sizeof(int)), nc_x=0;
966
+ int *common_y = (int*)calloc(n,sizeof(int));
967
+ int *common_y_list = (int*)malloc(n*sizeof(int)), nc_y=0;
968
+ frac_prev=-1;
969
+
970
+ for (int x=0;x<n;x++) {
971
+ frac = 100LL*x/n;
972
+ if (frac!=frac_prev) {
973
+ printf("%d%%\r",frac);
974
+ frac_prev=frac;
975
+ }
976
+
977
+ // common nodes of x and some other node
978
+ for (int i=0;i<nc_x;i++) common_x[common_x_list[i]]=0;
979
+ nc_x=0;
980
+ for (int nx=0;nx<deg[x];nx++) {
981
+ int a=adj[x][nx];
982
+ for (int na=0;na<deg[a];na++) {
983
+ int z=adj[a][na];
984
+ if (z==x) continue;
985
+ if (common_x[z]==0) common_x_list[nc_x++]=z;
986
+ common_x[z]++;
987
+ }
988
+ }
989
+
990
+ for (int nx=0;nx<deg[x];nx++) {
991
+ int y=inc[x][nx].first, xy=inc[x][nx].second;
992
+ int e=xy;
993
+ if (y>=x) break;
994
+
995
+ // common nodes of y and some other node
996
+ for (int i=0;i<nc_y;i++) common_y[common_y_list[i]]=0;
997
+ nc_y=0;
998
+ for (int ny=0;ny<deg[y];ny++) {
999
+ int a=adj[y][ny];
1000
+ for (int na=0;na<deg[a];na++) {
1001
+ int z=adj[a][na];
1002
+ if (z==y) continue;
1003
+ if (common_y[z]==0) common_y_list[nc_y++]=z;
1004
+ common_y[z]++;
1005
+ }
1006
+ }
1007
+
1008
+ int64 f_66=0, f_65=0, f_62=0, f_61=0, f_60=0, f_51=0, f_50=0; // 11
1009
+ int64 f_64=0, f_58=0, f_55=0, f_48=0, f_41=0, f_35=0; // 10
1010
+ int64 f_63=0, f_59=0, f_57=0, f_54=0, f_53=0, f_52=0, f_47=0, f_40=0, f_39=0, f_34=0, f_33=0; // 9
1011
+ int64 f_45=0, f_36=0, f_26=0, f_23=0, f_19=0; // 7
1012
+ int64 f_49=0, f_38=0, f_37=0, f_32=0, f_25=0, f_22=0, f_18=0; // 6
1013
+ int64 f_56=0, f_46=0, f_44=0, f_43=0, f_42=0, f_31=0, f_30=0; // 5
1014
+ int64 f_27=0, f_17=0, f_15=0; // 4
1015
+ int64 f_20=0, f_16=0, f_13=0; // 3
1016
+ int64 f_29=0, f_28=0, f_24=0, f_21=0, f_14=0, f_12=0; // 2
1017
+
1018
+ // smaller (3-node) graphlets
1019
+ orbit[x][0] = deg[x];
1020
+ for (int nx1=0;nx1<deg[x];nx1++) {
1021
+ int z=adj[x][nx1];
1022
+ if (z==y) continue;
1023
+ if (adjacent(y,z)) eorbit[e][1]++;
1024
+ else eorbit[e][0]++;
1025
+ }
1026
+ for (int ny=0;ny<deg[y];ny++) {
1027
+ int z=adj[y][ny];
1028
+ if (z==x) continue;
1029
+ if (!adjacent(x,z)) eorbit[e][0]++;
1030
+ }
1031
+
1032
+ // edge-orbit 11 = (14,14)
1033
+ for (int nx1=0;nx1<deg[x];nx1++) {
1034
+ int a=adj[x][nx1], xa=inc[x][nx1].second;
1035
+ if (a==y || !adjacent(y,a)) continue;
1036
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
1037
+ int b=adj[x][nx2], xb=inc[x][nx2].second;
1038
+ if (b==y || !adjacent(y,b) || !adjacent(a,b)) continue;
1039
+ int ya=getEdgeId(y,a), yb=getEdgeId(y,b), ab=getEdgeId(a,b);
1040
+ eorbit[e][11]++;
1041
+ f_66 += common3_get(TRIPLE(x,y,a))-1;
1042
+ f_66 += common3_get(TRIPLE(x,y,b))-1;
1043
+ f_65 += common3_get(TRIPLE(a,b,x))-1;
1044
+ f_65 += common3_get(TRIPLE(a,b,y))-1;
1045
+ f_62 += tri[xy]-2;
1046
+ f_61 += (tri[xa]-2)+(tri[xb]-2)+(tri[ya]-2)+(tri[yb]-2);
1047
+ f_60 += tri[ab]-2;
1048
+ f_51 += (deg[x]-3)+(deg[y]-3);
1049
+ f_50 += (deg[a]-3)+(deg[b]-3);
1050
+ }
1051
+ }
1052
+
1053
+ // edge-orbit 10 = (13,13)
1054
+ for (int nx1=0;nx1<deg[x];nx1++) {
1055
+ int a=adj[x][nx1], xa=inc[x][nx1].second;
1056
+ if (a==y || !adjacent(y,a)) continue;
1057
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
1058
+ int b=adj[x][nx2], xb=inc[x][nx2].second;
1059
+ if (b==y || !adjacent(y,b) || adjacent(a,b)) continue;
1060
+ int ya=getEdgeId(y,a), yb=getEdgeId(y,b);
1061
+ eorbit[e][10]++;
1062
+ f_64 += common3_get(TRIPLE(a,b,x))-1;
1063
+ f_64 += common3_get(TRIPLE(a,b,y))-1;
1064
+ f_58 += common2_get(PAIR(a,b))-2;
1065
+ f_55 += (tri[xa]-1)+(tri[xb]-1)+(tri[ya]-1)+(tri[yb]-1);
1066
+ f_48 += tri[xy]-2;
1067
+ f_41 += (deg[a]-2)+(deg[b]-2);
1068
+ f_35 += (deg[x]-3)+(deg[y]-3);
1069
+ }
1070
+ }
1071
+
1072
+ // edge-orbit 9 = (12,13)
1073
+ for (int nx=0;nx<deg[x];nx++) {
1074
+ int a=adj[x][nx], xa=inc[x][nx].second;
1075
+ if (a==y) continue;
1076
+ for (int ny=0;ny<deg[y];ny++) {
1077
+ int b=adj[y][ny], yb=inc[y][ny].second;
1078
+ if (b==x || !adjacent(a,b)) continue;
1079
+ int adj_ya=adjacent(y,a), adj_xb=adjacent(x,b);
1080
+ if (adj_ya+adj_xb!=1) continue;
1081
+ int ab=getEdgeId(a,b);
1082
+ eorbit[e][9]++;
1083
+ if (adj_xb) {
1084
+ int xb=getEdgeId(x,b);
1085
+ f_63 += common3_get(TRIPLE(a,b,y))-1;
1086
+ f_59 += common3_get(TRIPLE(a,b,x));
1087
+ f_57 += common_y[a]-2;
1088
+ f_54 += tri[yb]-1;
1089
+ f_53 += tri[xa]-1;
1090
+ f_47 += tri[xb]-2;
1091
+ f_40 += deg[y]-2;
1092
+ f_39 += deg[a]-2;
1093
+ f_34 += deg[x]-3;
1094
+ f_33 += deg[b]-3;
1095
+ } else if (adj_ya) {
1096
+ int ya=getEdgeId(y,a);
1097
+ f_63 += common3_get(TRIPLE(a,b,x))-1;
1098
+ f_59 += common3_get(TRIPLE(a,b,y));
1099
+ f_57 += common_x[b]-2;
1100
+ f_54 += tri[xa]-1;
1101
+ f_53 += tri[yb]-1;
1102
+ f_47 += tri[ya]-2;
1103
+ f_40 += deg[x]-2;
1104
+ f_39 += deg[b]-2;
1105
+ f_34 += deg[y]-3;
1106
+ f_33 += deg[a]-3;
1107
+ }
1108
+ f_52 += tri[ab]-1;
1109
+ }
1110
+ }
1111
+
1112
+ // edge-orbit 8 = (10,11)
1113
+ for (int nx=0;nx<deg[x];nx++) {
1114
+ int a=adj[x][nx];
1115
+ if (a==y || !adjacent(y,a)) continue;
1116
+ for (int nx1=0;nx1<deg[x];nx1++) {
1117
+ int b=adj[x][nx1];
1118
+ if (b==y || b==a || adjacent(y,b) || adjacent(a,b)) continue;
1119
+ eorbit[e][8]++;
1120
+ }
1121
+ for (int ny1=0;ny1<deg[y];ny1++) {
1122
+ int b=adj[y][ny1];
1123
+ if (b==x || b==a || adjacent(x,b) || adjacent(a,b)) continue;
1124
+ eorbit[e][8]++;
1125
+ }
1126
+ }
1127
+
1128
+ // edge-orbit 7 = (10,10)
1129
+ for (int nx=0;nx<deg[x];nx++) {
1130
+ int a=adj[x][nx];
1131
+ if (a==y || !adjacent(y,a)) continue;
1132
+ for (int na=0;na<deg[a];na++) {
1133
+ int b=adj[a][na], ab=inc[a][na].second;
1134
+ if (b==x || b==y || adjacent(x,b) || adjacent(y,b)) continue;
1135
+ eorbit[e][7]++;
1136
+ f_45 += common_x[b]-1;
1137
+ f_45 += common_y[b]-1;
1138
+ f_36 += tri[ab];
1139
+ f_26 += deg[a]-3;
1140
+ f_23 += deg[b]-1;
1141
+ f_19 += (deg[x]-2)+(deg[y]-2);
1142
+ }
1143
+ }
1144
+
1145
+ // edge-orbit 6 = (9,11)
1146
+ for (int ny1=0;ny1<deg[y];ny1++) {
1147
+ int a=adj[y][ny1], ya=inc[y][ny1].second;
1148
+ if (a==x || adjacent(x,a)) continue;
1149
+ for (int ny2=ny1+1;ny2<deg[y];ny2++) {
1150
+ int b=adj[y][ny2], yb=inc[y][ny2].second;
1151
+ if (b==x || adjacent(x,b) || !adjacent(a,b)) continue;
1152
+ int ab=getEdgeId(a,b);
1153
+ eorbit[e][6]++;
1154
+ f_49 += common3_get(TRIPLE(y,a,b));
1155
+ f_38 += tri[ab]-1;
1156
+ f_37 += tri[xy];
1157
+ f_32 += (tri[ya]-1)+(tri[yb]-1);
1158
+ f_25 += deg[y]-3;
1159
+ f_22 += deg[x]-1;
1160
+ f_18 += (deg[a]-2)+(deg[b]-2);
1161
+ }
1162
+ }
1163
+ for (int nx1=0;nx1<deg[x];nx1++) {
1164
+ int a=adj[x][nx1], xa=inc[x][nx1].second;
1165
+ if (a==y || adjacent(y,a)) continue;
1166
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
1167
+ int b=adj[x][nx2], xb=inc[x][nx2].second;
1168
+ if (b==y || adjacent(y,b) || !adjacent(a,b)) continue;
1169
+ int ab=getEdgeId(a,b);
1170
+ eorbit[e][6]++;
1171
+ f_49 += common3_get(TRIPLE(x,a,b));
1172
+ f_38 += tri[ab]-1;
1173
+ f_37 += tri[xy];
1174
+ f_32 += (tri[xa]-1)+(tri[xb]-1);
1175
+ f_25 += deg[x]-3;
1176
+ f_22 += deg[y]-1;
1177
+ f_18 += (deg[a]-2)+(deg[b]-2);
1178
+ }
1179
+ }
1180
+
1181
+ // edge-orbit 5 = (8,8)
1182
+ for (int nx=0;nx<deg[x];nx++) {
1183
+ int a=adj[x][nx], xa=inc[x][nx].second;
1184
+ if (a==y || adjacent(y,a)) continue;
1185
+ for (int ny=0;ny<deg[y];ny++) {
1186
+ int b=adj[y][ny], yb=inc[y][ny].second;
1187
+ if (b==x || adjacent(x,b) || !adjacent(a,b)) continue;
1188
+ int ab=getEdgeId(a,b);
1189
+ eorbit[e][5]++;
1190
+ f_56 += common3_get(TRIPLE(x,a,b));
1191
+ f_56 += common3_get(TRIPLE(y,a,b));
1192
+ f_46 += tri[xy];
1193
+ f_44 += tri[xa]+tri[yb];
1194
+ f_43 += tri[ab];
1195
+ f_42 += common_x[b]-2;
1196
+ f_42 += common_y[a]-2;
1197
+ f_31 += (deg[x]-2)+(deg[y]-2);
1198
+ f_30 += (deg[a]-2)+(deg[b]-2);
1199
+ }
1200
+ }
1201
+
1202
+ // edge-orbit 4 = (6,7)
1203
+ for (int ny1=0;ny1<deg[y];ny1++) {
1204
+ int a=adj[y][ny1];
1205
+ if (a==x || adjacent(x,a)) continue;
1206
+ for (int ny2=ny1+1;ny2<deg[y];ny2++) {
1207
+ int b=adj[y][ny2];
1208
+ if (b==x || adjacent(x,b) || adjacent(a,b)) continue;
1209
+ eorbit[e][4]++;
1210
+ f_27 += tri[xy];
1211
+ f_17 += deg[y]-3;
1212
+ f_15 += (deg[a]-1)+(deg[b]-1);
1213
+ }
1214
+ }
1215
+ for (int nx1=0;nx1<deg[x];nx1++) {
1216
+ int a=adj[x][nx1];
1217
+ if (a==y || adjacent(y,a)) continue;
1218
+ for (int nx2=nx1+1;nx2<deg[x];nx2++) {
1219
+ int b=adj[x][nx2];
1220
+ if (b==y || adjacent(y,b) || adjacent(a,b)) continue;
1221
+ eorbit[e][4]++;
1222
+ f_27 += tri[xy];
1223
+ f_17 += deg[x]-3;
1224
+ f_15 += (deg[a]-1)+(deg[b]-1);
1225
+ }
1226
+ }
1227
+
1228
+ // edge-orbit 3 = (5,5)
1229
+ for (int nx=0;nx<deg[x];nx++) {
1230
+ int a=adj[x][nx];
1231
+ if (a==y || adjacent(y,a)) continue;
1232
+ for (int ny=0;ny<deg[y];ny++) {
1233
+ int b=adj[y][ny];
1234
+ if (b==x || adjacent(x,b) || adjacent(a,b)) continue;
1235
+ eorbit[e][3]++;
1236
+ f_20 += tri[xy];
1237
+ f_16 += (deg[x]-2)+(deg[y]-2);
1238
+ f_13 += (deg[a]-1)+(deg[b]-1);
1239
+ }
1240
+ }
1241
+
1242
+ // edge-orbit 2 = (4,5)
1243
+ for (int ny=0;ny<deg[y];ny++) {
1244
+ int a=adj[y][ny];
1245
+ if (a==x || adjacent(x,a)) continue;
1246
+ for (int na=0;na<deg[a];na++) {
1247
+ int b=adj[a][na], ab=inc[a][na].second;
1248
+ if (b==y || adjacent(y,b) || adjacent(x,b)) continue;
1249
+ eorbit[e][2]++;
1250
+ f_29 += common_y[b]-1;
1251
+ f_28 += common_x[b];
1252
+ f_24 += tri[xy];
1253
+ f_21 += tri[ab];
1254
+ f_14 += deg[a]-2;
1255
+ f_12 += deg[b]-1;
1256
+ }
1257
+ }
1258
+ for (int nx=0;nx<deg[x];nx++) {
1259
+ int a=adj[x][nx];
1260
+ if (a==y || adjacent(y,a)) continue;
1261
+ for (int na=0;na<deg[a];na++) {
1262
+ int b=adj[a][na], ab=inc[a][na].second;
1263
+ if (b==x || adjacent(x,b) || adjacent(y,b)) continue;
1264
+ eorbit[e][2]++;
1265
+ f_29 += common_x[b]-1;
1266
+ f_28 += common_y[b];
1267
+ f_24 += tri[xy];
1268
+ f_21 += tri[ab];
1269
+ f_14 += deg[a]-2;
1270
+ f_12 += deg[b]-1;
1271
+ }
1272
+ }
1273
+
1274
+ // solve system of equations
1275
+ eorbit[e][67]=C5[e];
1276
+ eorbit[e][66]=(f_66-6*eorbit[e][67])/2;
1277
+ eorbit[e][65]=(f_65-6*eorbit[e][67]);
1278
+ eorbit[e][64]=(f_64-2*eorbit[e][66]);
1279
+ eorbit[e][63]=(f_63-2*eorbit[e][65])/2;
1280
+ eorbit[e][62]=(f_62-2*eorbit[e][66]-3*eorbit[e][67]);
1281
+ eorbit[e][61]=(f_61-2*eorbit[e][65]-4*eorbit[e][66]-12*eorbit[e][67]);
1282
+ eorbit[e][60]=(f_60-1*eorbit[e][65]-3*eorbit[e][67]);
1283
+ eorbit[e][59]=(f_59-2*eorbit[e][65])/2;
1284
+ eorbit[e][58]=(f_58-1*eorbit[e][64]-1*eorbit[e][66]);
1285
+ eorbit[e][57]=(f_57-2*eorbit[e][63]-2*eorbit[e][64]-2*eorbit[e][65]);
1286
+ eorbit[e][56]=(f_56-2*eorbit[e][63])/2;
1287
+ eorbit[e][55]=(f_55-4*eorbit[e][62]-2*eorbit[e][64]-4*eorbit[e][66]);
1288
+ eorbit[e][54]=(f_54-1*eorbit[e][61]-2*eorbit[e][63]-2*eorbit[e][65])/2;
1289
+ eorbit[e][53]=(f_53-2*eorbit[e][59]-2*eorbit[e][64]-2*eorbit[e][65]);
1290
+ eorbit[e][52]=(f_52-2*eorbit[e][59]-2*eorbit[e][63]-2*eorbit[e][65]);
1291
+ eorbit[e][51]=(f_51-1*eorbit[e][61]-2*eorbit[e][62]-1*eorbit[e][65]-4*eorbit[e][66]-6*eorbit[e][67]);
1292
+ eorbit[e][50]=(f_50-2*eorbit[e][60]-1*eorbit[e][61]-2*eorbit[e][65]-2*eorbit[e][66]-6*eorbit[e][67]);
1293
+ eorbit[e][49]=(f_49-1*eorbit[e][59])/3;
1294
+ eorbit[e][48]=(f_48-2*eorbit[e][62]-1*eorbit[e][66])/3;
1295
+ eorbit[e][47]=(f_47-2*eorbit[e][59]-1*eorbit[e][61]-2*eorbit[e][65])/2;
1296
+ eorbit[e][46]=(f_46-1*eorbit[e][57]-1*eorbit[e][63]);
1297
+ eorbit[e][45]=(f_45-1*eorbit[e][52]-4*eorbit[e][58]-4*eorbit[e][60]);
1298
+ eorbit[e][44]=(f_44-2*eorbit[e][56]-1*eorbit[e][57]-2*eorbit[e][63]);
1299
+ eorbit[e][43]=(f_43-2*eorbit[e][56]-1*eorbit[e][63]);
1300
+ eorbit[e][42]=(f_42-2*eorbit[e][56]-1*eorbit[e][57]-2*eorbit[e][63])/2;
1301
+ eorbit[e][41]=(f_41-1*eorbit[e][55]-2*eorbit[e][58]-2*eorbit[e][62]-2*eorbit[e][64]-2*eorbit[e][66]);
1302
+ eorbit[e][40]=(f_40-2*eorbit[e][54]-1*eorbit[e][55]-1*eorbit[e][57]-1*eorbit[e][61]-2*eorbit[e][63]-2*eorbit[e][64]-2*eorbit[e][65]);
1303
+ eorbit[e][39]=(f_39-1*eorbit[e][52]-1*eorbit[e][53]-1*eorbit[e][57]-2*eorbit[e][59]-2*eorbit[e][63]-2*eorbit[e][64]-2*eorbit[e][65]);
1304
+ eorbit[e][38]=(f_38-3*eorbit[e][49]-1*eorbit[e][56]-1*eorbit[e][59]);
1305
+ eorbit[e][37]=(f_37-1*eorbit[e][53]-1*eorbit[e][59]);
1306
+ eorbit[e][36]=(f_36-1*eorbit[e][52]-2*eorbit[e][60])/2;
1307
+ eorbit[e][35]=(f_35-6*eorbit[e][48]-1*eorbit[e][55]-4*eorbit[e][62]-1*eorbit[e][64]-2*eorbit[e][66]);
1308
+ eorbit[e][34]=(f_34-2*eorbit[e][47]-1*eorbit[e][53]-1*eorbit[e][55]-2*eorbit[e][59]-1*eorbit[e][61]-2*eorbit[e][64]-2*eorbit[e][65]);
1309
+ eorbit[e][33]=(f_33-2*eorbit[e][47]-1*eorbit[e][52]-2*eorbit[e][54]-2*eorbit[e][59]-1*eorbit[e][61]-2*eorbit[e][63]-2*eorbit[e][65]);
1310
+ eorbit[e][32]=(f_32-6*eorbit[e][49]-1*eorbit[e][53]-2*eorbit[e][59])/2;
1311
+ eorbit[e][31]=(f_31-2*eorbit[e][42]-1*eorbit[e][44]-2*eorbit[e][46]-2*eorbit[e][56]-2*eorbit[e][57]-2*eorbit[e][63]);
1312
+ eorbit[e][30]=(f_30-2*eorbit[e][42]-2*eorbit[e][43]-1*eorbit[e][44]-4*eorbit[e][56]-1*eorbit[e][57]-2*eorbit[e][63]);
1313
+ eorbit[e][29]=(f_29-2*eorbit[e][38]-1*eorbit[e][45]-1*eorbit[e][52])/2;
1314
+ eorbit[e][28]=(f_28-2*eorbit[e][43]-1*eorbit[e][45]-1*eorbit[e][52])/2;
1315
+ eorbit[e][27]=(f_27-1*eorbit[e][34]-1*eorbit[e][47]);
1316
+ eorbit[e][26]=(f_26-1*eorbit[e][33]-2*eorbit[e][36]-1*eorbit[e][50]-1*eorbit[e][52]-2*eorbit[e][60])/2;
1317
+ eorbit[e][25]=(f_25-2*eorbit[e][32]-1*eorbit[e][37]-3*eorbit[e][49]-1*eorbit[e][53]-1*eorbit[e][59]);
1318
+ eorbit[e][24]=(f_24-1*eorbit[e][39]-1*eorbit[e][45]-1*eorbit[e][52]);
1319
+ eorbit[e][23]=(f_23-2*eorbit[e][36]-1*eorbit[e][45]-1*eorbit[e][52]-2*eorbit[e][58]-2*eorbit[e][60]);
1320
+ eorbit[e][22]=(f_22-1*eorbit[e][37]-1*eorbit[e][44]-1*eorbit[e][53]-1*eorbit[e][56]-1*eorbit[e][59]);
1321
+ eorbit[e][21]=(f_21-2*eorbit[e][38]-2*eorbit[e][43]-1*eorbit[e][52])/2;
1322
+ eorbit[e][20]=(f_20-1*eorbit[e][40]-1*eorbit[e][54]);
1323
+ eorbit[e][19]=(f_19-1*eorbit[e][33]-2*eorbit[e][41]-1*eorbit[e][45]-2*eorbit[e][50]-1*eorbit[e][52]-4*eorbit[e][58]-4*eorbit[e][60]);
1324
+ eorbit[e][18]=(f_18-2*eorbit[e][32]-2*eorbit[e][38]-1*eorbit[e][44]-6*eorbit[e][49]-1*eorbit[e][53]-2*eorbit[e][56]-2*eorbit[e][59]);
1325
+ eorbit[e][17]=(f_17-2*eorbit[e][25]-1*eorbit[e][27]-1*eorbit[e][32]-1*eorbit[e][34]-1*eorbit[e][47])/3;
1326
+ eorbit[e][16]=(f_16-2*eorbit[e][20]-2*eorbit[e][22]-1*eorbit[e][31]-2*eorbit[e][40]-1*eorbit[e][44]-2*eorbit[e][54])/2;
1327
+ eorbit[e][15]=(f_15-2*eorbit[e][25]-2*eorbit[e][29]-1*eorbit[e][31]-2*eorbit[e][32]-1*eorbit[e][34]-2*eorbit[e][42]-2*eorbit[e][47]);
1328
+ eorbit[e][14]=(f_14-1*eorbit[e][18]-2*eorbit[e][21]-1*eorbit[e][30]-2*eorbit[e][38]-1*eorbit[e][39]-2*eorbit[e][43]-1*eorbit[e][52])/2;
1329
+ eorbit[e][13]=(f_13-2*eorbit[e][22]-2*eorbit[e][28]-1*eorbit[e][31]-1*eorbit[e][40]-2*eorbit[e][44]-2*eorbit[e][54]);
1330
+ eorbit[e][12]=(f_12-2*eorbit[e][21]-2*eorbit[e][28]-2*eorbit[e][29]-2*eorbit[e][38]-2*eorbit[e][43]-1*eorbit[e][45]-1*eorbit[e][52]);
1331
+ }
1332
+ }
1333
+
1334
+ endTime = clock();
1335
+ printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
1336
+
1337
+ endTime_all = endTime;
1338
+ printf("total: %.2f\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
1339
+ }
1340
+
1341
+ fstream fin, fout; // input and output files
1342
+ int GS=5;
1343
+ string orbit_type;
1344
+
1345
+ int motif_counts(char* orbit_type, int graphlet_size, const char* input_filename, const char* output_filename) {
1346
+ // open input, output files
1347
+ if (strcmp(orbit_type, "node")!=0 && strcmp(orbit_type, "edge")!=0) {
1348
+ cerr << "Incorrect orbit type '" << orbit_type << "'. Should be 'node' or 'edge'." << endl;
1349
+ return 0;
1350
+ }
1351
+ if (GS!=4 && GS!=5) {
1352
+ cerr << "Incorrect graphlet size " << graphlet_size << ". Should be 4 or 5." << endl;
1353
+ return 0;
1354
+ }
1355
+ fin.open(input_filename, fstream::in);
1356
+ fout.open(output_filename, fstream::out | fstream::binary);
1357
+ if (fin.fail()) {
1358
+ cerr << "Failed to open file " << input_filename << endl;
1359
+ return 0;
1360
+ }
1361
+ if (fout.fail()) {
1362
+ cerr << "Failed to open file " << output_filename << endl;
1363
+ return 0;
1364
+ }
1365
+ // read input graph
1366
+ fin >> n >> m;
1367
+ int d_max=0;
1368
+ edges = (PAIR*)malloc(m*sizeof(PAIR));
1369
+ deg = (int*)calloc(n,sizeof(int));
1370
+ for (int i=0;i<m;i++) {
1371
+ int a,b;
1372
+ fin >> a >> b;
1373
+ if (!(0<=a && a<n) || !(0<=b && b<n)) {
1374
+ cerr << "Node ids should be between 0 and n-1." << endl;
1375
+ return 0;
1376
+ }
1377
+ if (a==b) {
1378
+ cerr << "Self loops (edge from x to x) are not allowed." << endl;
1379
+ return 0;
1380
+ }
1381
+ deg[a]++; deg[b]++;
1382
+ edges[i]=PAIR(a,b);
1383
+ }
1384
+ for (int i=0;i<n;i++) d_max=max(d_max,deg[i]);
1385
+ printf("nodes: %d\n",n);
1386
+ printf("edges: %d\n",m);
1387
+ printf("max degree: %d\n",d_max);
1388
+ fin.close();
1389
+ if ((int)(set<PAIR>(edges,edges+m).size())!=m) {
1390
+ cerr << "Input file contains duplicate undirected edges." << endl;
1391
+ return 0;
1392
+ }
1393
+ // set up adjacency matrix if it's smaller than 100MB
1394
+ if ((int64)n*n < 100LL*1024*1024*8) {
1395
+ adjacent = adjacent_matrix;
1396
+ adj_matrix = (int*)calloc((n*n)/adj_chunk+1,sizeof(int));
1397
+ for (int i=0;i<m;i++) {
1398
+ int a=edges[i].a, b=edges[i].b;
1399
+ adj_matrix[(a*n+b)/adj_chunk]|=(1<<((a*n+b)%adj_chunk));
1400
+ adj_matrix[(b*n+a)/adj_chunk]|=(1<<((b*n+a)%adj_chunk));
1401
+ }
1402
+ } else {
1403
+ adjacent = adjacent_list;
1404
+ }
1405
+ // set up adjacency, incidence lists
1406
+ adj = (int**)malloc(n*sizeof(int*));
1407
+ for (int i=0;i<n;i++) adj[i] = (int*)malloc(deg[i]*sizeof(int));
1408
+ inc = (PII**)malloc(n*sizeof(PII*));
1409
+ for (int i=0;i<n;i++) inc[i] = (PII*)malloc(deg[i]*sizeof(PII));
1410
+ int *d = (int*)calloc(n,sizeof(int));
1411
+ for (int i=0;i<m;i++) {
1412
+ int a=edges[i].a, b=edges[i].b;
1413
+ adj[a][d[a]]=b; adj[b][d[b]]=a;
1414
+ inc[a][d[a]]=PII(b,i); inc[b][d[b]]=PII(a,i);
1415
+ d[a]++; d[b]++;
1416
+ }
1417
+ for (int i=0;i<n;i++) {
1418
+ sort(adj[i],adj[i]+deg[i]);
1419
+ sort(inc[i],inc[i]+deg[i]);
1420
+ }
1421
+ // initialize orbit counts
1422
+ orbit = (int64**)malloc(n*sizeof(int64*));
1423
+ for (int i=0;i<n;i++) orbit[i] = (int64*)calloc(73,sizeof(int64));
1424
+ // initialize edge orbit counts
1425
+ eorbit = (int64**)malloc(m*sizeof(int64*));
1426
+ for (int i=0;i<m;i++) eorbit[i] = (int64*)calloc(68,sizeof(int64));
1427
+ return 1;
1428
+ }
1429
+
1430
+ int init(int argc, char *argv[]) {
1431
+ if (argc!=5) {
1432
+ cerr << "Incorrect number of arguments." << endl;
1433
+ cerr << "Usage: orca.exe [orbit type: node|edge] [graphlet size: 4/5] [graph - input file] [graphlets - output file]" << endl;
1434
+ return 0;
1435
+ }
1436
+ int graphlet_size;
1437
+ sscanf(argv[2],"%d", &graphlet_size);
1438
+ motif_counts(argv[1], graphlet_size, argv[3], argv[4]);
1439
+ }
1440
+
1441
+ void writeResults(int g=5) {
1442
+ int no[] = {0,0,1,4,15,73};
1443
+ for (int i=0;i<n;i++) {
1444
+ for (int j=0;j<no[g];j++) {
1445
+ if (j!=0) fout << " ";
1446
+ fout << orbit[i][j];
1447
+ }
1448
+ fout << endl;
1449
+ }
1450
+ fout.close();
1451
+ }
1452
+
1453
+ void writeEdgeResults(int g=5) {
1454
+ int no[] = {0,0,0,2,12,68};
1455
+ for (int i=0;i<m;i++) {
1456
+ for (int j=0;j<no[g];j++) {
1457
+ if (j!=0) fout << " ";
1458
+ fout << eorbit[i][j];
1459
+ }
1460
+ fout << endl;
1461
+ }
1462
+ fout.close();
1463
+ }
1464
+
1465
+
1466
+ //int main(int argc, char *argv[]) {
1467
+ //
1468
+ //
1469
+ // if (!init(argc, argv)) {
1470
+ // cerr << "Stopping!" << endl;
1471
+ // return 0;
1472
+ // }
1473
+ // if (orbit_type=="node") {
1474
+ // printf("Counting NODE orbits of graphlets on %d nodes.\n\n",GS);
1475
+ // if (GS==4) count4();
1476
+ // if (GS==5) count5();
1477
+ // writeResults(GS);
1478
+ // } else {
1479
+ // printf("Counting EDGE orbits of graphlets on %d nodes.\n\n",GS);
1480
+ // if (GS==4) ecount4();
1481
+ // if (GS==5) ecount5();
1482
+ // writeEdgeResults(GS);
1483
+ // }
1484
+ //
1485
+ //
1486
+ // return 0;
1487
+ //}
1488
+
analysis/orca/tmp_JJOX0U87.txt ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 9 24
2
+ 0 1
3
+ 0 2
4
+ 0 5
5
+ 0 6
6
+ 0 7
7
+ 1 4
8
+ 1 6
9
+ 1 8
10
+ 2 3
11
+ 2 4
12
+ 2 6
13
+ 2 8
14
+ 3 4
15
+ 3 5
16
+ 3 7
17
+ 4 5
18
+ 4 7
19
+ 4 8
20
+ 5 6
21
+ 5 7
22
+ 5 8
23
+ 6 7
24
+ 6 8
25
+ 7 8
analysis/orca/tmp_YX4O2JRL.txt ADDED
@@ -0,0 +1,3269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 217 3268
2
+ 0 1
3
+ 0 2
4
+ 0 4
5
+ 0 8
6
+ 0 9
7
+ 0 11
8
+ 0 13
9
+ 0 16
10
+ 0 17
11
+ 0 30
12
+ 0 33
13
+ 0 35
14
+ 0 36
15
+ 0 42
16
+ 0 47
17
+ 0 50
18
+ 0 51
19
+ 0 52
20
+ 0 53
21
+ 0 54
22
+ 0 55
23
+ 0 56
24
+ 0 57
25
+ 0 58
26
+ 1 2
27
+ 1 3
28
+ 1 4
29
+ 1 7
30
+ 1 8
31
+ 1 9
32
+ 1 10
33
+ 1 11
34
+ 1 12
35
+ 1 13
36
+ 1 14
37
+ 1 15
38
+ 1 16
39
+ 1 17
40
+ 1 18
41
+ 1 19
42
+ 1 20
43
+ 1 21
44
+ 1 22
45
+ 1 23
46
+ 1 24
47
+ 1 25
48
+ 1 27
49
+ 1 28
50
+ 1 29
51
+ 1 30
52
+ 1 32
53
+ 1 33
54
+ 1 34
55
+ 1 35
56
+ 1 36
57
+ 1 37
58
+ 1 38
59
+ 1 39
60
+ 1 40
61
+ 1 41
62
+ 1 42
63
+ 1 43
64
+ 1 46
65
+ 1 47
66
+ 1 48
67
+ 1 49
68
+ 1 50
69
+ 1 51
70
+ 1 52
71
+ 1 53
72
+ 1 54
73
+ 1 55
74
+ 1 57
75
+ 1 62
76
+ 1 64
77
+ 1 69
78
+ 1 80
79
+ 1 95
80
+ 1 97
81
+ 1 98
82
+ 1 99
83
+ 1 100
84
+ 1 101
85
+ 1 103
86
+ 1 104
87
+ 1 105
88
+ 1 106
89
+ 1 107
90
+ 1 108
91
+ 1 111
92
+ 1 112
93
+ 1 115
94
+ 1 116
95
+ 1 117
96
+ 1 118
97
+ 1 120
98
+ 1 122
99
+ 1 124
100
+ 1 127
101
+ 1 128
102
+ 1 137
103
+ 1 162
104
+ 1 164
105
+ 1 165
106
+ 1 197
107
+ 2 3
108
+ 2 4
109
+ 2 5
110
+ 2 8
111
+ 2 9
112
+ 2 10
113
+ 2 11
114
+ 2 12
115
+ 2 13
116
+ 2 14
117
+ 2 16
118
+ 2 17
119
+ 2 18
120
+ 2 19
121
+ 2 20
122
+ 2 21
123
+ 2 22
124
+ 2 23
125
+ 2 24
126
+ 2 25
127
+ 2 27
128
+ 2 28
129
+ 2 29
130
+ 2 30
131
+ 2 31
132
+ 2 33
133
+ 2 35
134
+ 2 36
135
+ 2 37
136
+ 2 38
137
+ 2 39
138
+ 2 41
139
+ 2 42
140
+ 2 43
141
+ 2 44
142
+ 2 45
143
+ 2 46
144
+ 2 47
145
+ 2 48
146
+ 2 49
147
+ 2 50
148
+ 2 51
149
+ 2 52
150
+ 2 53
151
+ 2 54
152
+ 2 55
153
+ 2 56
154
+ 2 57
155
+ 2 59
156
+ 2 60
157
+ 2 62
158
+ 2 63
159
+ 2 64
160
+ 2 65
161
+ 2 66
162
+ 2 69
163
+ 2 80
164
+ 2 95
165
+ 2 96
166
+ 2 97
167
+ 2 98
168
+ 2 99
169
+ 2 100
170
+ 2 101
171
+ 2 103
172
+ 2 104
173
+ 2 105
174
+ 2 106
175
+ 2 107
176
+ 2 108
177
+ 2 110
178
+ 2 111
179
+ 2 112
180
+ 2 113
181
+ 2 114
182
+ 2 115
183
+ 2 116
184
+ 2 117
185
+ 2 118
186
+ 2 119
187
+ 2 120
188
+ 2 121
189
+ 2 122
190
+ 2 123
191
+ 2 126
192
+ 2 127
193
+ 2 128
194
+ 2 129
195
+ 2 131
196
+ 2 135
197
+ 2 136
198
+ 2 137
199
+ 2 154
200
+ 2 158
201
+ 2 162
202
+ 2 164
203
+ 2 175
204
+ 2 202
205
+ 3 6
206
+ 3 7
207
+ 3 8
208
+ 3 11
209
+ 3 13
210
+ 3 14
211
+ 3 15
212
+ 3 16
213
+ 3 17
214
+ 3 18
215
+ 3 19
216
+ 3 20
217
+ 3 25
218
+ 3 26
219
+ 3 27
220
+ 3 29
221
+ 3 31
222
+ 3 32
223
+ 3 33
224
+ 3 34
225
+ 3 35
226
+ 3 37
227
+ 3 38
228
+ 3 39
229
+ 3 40
230
+ 3 41
231
+ 3 43
232
+ 3 44
233
+ 3 46
234
+ 3 47
235
+ 3 48
236
+ 3 49
237
+ 3 50
238
+ 3 51
239
+ 3 52
240
+ 3 54
241
+ 3 55
242
+ 3 56
243
+ 3 57
244
+ 3 69
245
+ 3 80
246
+ 3 95
247
+ 3 99
248
+ 3 100
249
+ 3 101
250
+ 3 102
251
+ 3 103
252
+ 3 104
253
+ 3 105
254
+ 3 107
255
+ 3 108
256
+ 3 111
257
+ 3 112
258
+ 3 114
259
+ 3 115
260
+ 3 116
261
+ 3 118
262
+ 3 121
263
+ 3 122
264
+ 3 124
265
+ 3 126
266
+ 3 128
267
+ 3 129
268
+ 3 130
269
+ 3 131
270
+ 3 136
271
+ 4 5
272
+ 4 7
273
+ 4 8
274
+ 4 9
275
+ 4 10
276
+ 4 11
277
+ 4 15
278
+ 4 16
279
+ 4 17
280
+ 4 20
281
+ 4 21
282
+ 4 25
283
+ 4 27
284
+ 4 28
285
+ 4 30
286
+ 4 32
287
+ 4 33
288
+ 4 34
289
+ 4 35
290
+ 4 36
291
+ 4 37
292
+ 4 38
293
+ 4 39
294
+ 4 41
295
+ 4 43
296
+ 4 44
297
+ 4 45
298
+ 4 46
299
+ 4 47
300
+ 4 49
301
+ 4 50
302
+ 4 51
303
+ 4 53
304
+ 4 54
305
+ 4 55
306
+ 4 56
307
+ 4 57
308
+ 4 59
309
+ 4 60
310
+ 4 61
311
+ 4 63
312
+ 4 65
313
+ 4 67
314
+ 4 69
315
+ 4 70
316
+ 4 80
317
+ 4 85
318
+ 4 87
319
+ 4 91
320
+ 4 92
321
+ 4 93
322
+ 4 94
323
+ 4 97
324
+ 4 98
325
+ 4 101
326
+ 4 103
327
+ 4 104
328
+ 4 105
329
+ 4 106
330
+ 4 107
331
+ 4 108
332
+ 4 110
333
+ 4 111
334
+ 4 112
335
+ 4 115
336
+ 4 116
337
+ 4 117
338
+ 4 118
339
+ 4 119
340
+ 4 120
341
+ 4 122
342
+ 4 123
343
+ 4 127
344
+ 4 128
345
+ 4 129
346
+ 4 130
347
+ 4 137
348
+ 4 138
349
+ 4 139
350
+ 4 140
351
+ 4 142
352
+ 4 146
353
+ 4 147
354
+ 4 148
355
+ 4 150
356
+ 4 151
357
+ 4 152
358
+ 4 154
359
+ 4 155
360
+ 4 156
361
+ 4 157
362
+ 4 164
363
+ 4 175
364
+ 4 176
365
+ 4 196
366
+ 4 197
367
+ 4 202
368
+ 5 8
369
+ 5 11
370
+ 5 13
371
+ 5 16
372
+ 5 17
373
+ 5 20
374
+ 5 25
375
+ 5 27
376
+ 5 30
377
+ 5 32
378
+ 5 33
379
+ 5 35
380
+ 5 36
381
+ 5 37
382
+ 5 38
383
+ 5 42
384
+ 5 43
385
+ 5 45
386
+ 5 46
387
+ 5 47
388
+ 5 50
389
+ 5 51
390
+ 5 53
391
+ 5 54
392
+ 5 55
393
+ 5 56
394
+ 5 57
395
+ 5 60
396
+ 5 64
397
+ 5 88
398
+ 5 98
399
+ 5 99
400
+ 5 101
401
+ 5 104
402
+ 5 105
403
+ 5 106
404
+ 5 107
405
+ 5 110
406
+ 5 116
407
+ 5 117
408
+ 5 119
409
+ 5 120
410
+ 5 122
411
+ 5 123
412
+ 5 137
413
+ 5 164
414
+ 5 202
415
+ 5 204
416
+ 6 7
417
+ 6 15
418
+ 6 16
419
+ 6 20
420
+ 6 21
421
+ 6 22
422
+ 6 23
423
+ 6 28
424
+ 6 29
425
+ 6 31
426
+ 6 32
427
+ 6 33
428
+ 6 34
429
+ 6 37
430
+ 6 39
431
+ 6 41
432
+ 6 51
433
+ 6 101
434
+ 6 103
435
+ 6 116
436
+ 6 131
437
+ 6 135
438
+ 6 136
439
+ 7 8
440
+ 7 10
441
+ 7 11
442
+ 7 12
443
+ 7 13
444
+ 7 14
445
+ 7 16
446
+ 7 17
447
+ 7 18
448
+ 7 19
449
+ 7 20
450
+ 7 21
451
+ 7 22
452
+ 7 23
453
+ 7 24
454
+ 7 25
455
+ 7 26
456
+ 7 27
457
+ 7 28
458
+ 7 29
459
+ 7 30
460
+ 7 31
461
+ 7 33
462
+ 7 35
463
+ 7 36
464
+ 7 37
465
+ 7 38
466
+ 7 39
467
+ 7 40
468
+ 7 41
469
+ 7 42
470
+ 7 43
471
+ 7 46
472
+ 7 47
473
+ 7 48
474
+ 7 49
475
+ 7 50
476
+ 7 51
477
+ 7 52
478
+ 7 55
479
+ 7 56
480
+ 7 57
481
+ 7 60
482
+ 7 63
483
+ 7 64
484
+ 7 65
485
+ 7 69
486
+ 7 97
487
+ 7 101
488
+ 7 103
489
+ 7 104
490
+ 7 105
491
+ 7 108
492
+ 7 112
493
+ 7 115
494
+ 7 116
495
+ 7 118
496
+ 7 122
497
+ 7 124
498
+ 7 126
499
+ 7 127
500
+ 7 129
501
+ 7 130
502
+ 7 131
503
+ 7 135
504
+ 7 136
505
+ 7 137
506
+ 8 9
507
+ 8 10
508
+ 8 11
509
+ 8 12
510
+ 8 13
511
+ 8 16
512
+ 8 17
513
+ 8 20
514
+ 8 21
515
+ 8 22
516
+ 8 25
517
+ 8 27
518
+ 8 28
519
+ 8 30
520
+ 8 32
521
+ 8 33
522
+ 8 35
523
+ 8 36
524
+ 8 37
525
+ 8 38
526
+ 8 39
527
+ 8 41
528
+ 8 42
529
+ 8 43
530
+ 8 44
531
+ 8 45
532
+ 8 46
533
+ 8 47
534
+ 8 48
535
+ 8 49
536
+ 8 50
537
+ 8 51
538
+ 8 53
539
+ 8 54
540
+ 8 55
541
+ 8 56
542
+ 8 57
543
+ 8 60
544
+ 8 62
545
+ 8 64
546
+ 8 66
547
+ 8 68
548
+ 8 95
549
+ 8 97
550
+ 8 98
551
+ 8 99
552
+ 8 101
553
+ 8 102
554
+ 8 104
555
+ 8 105
556
+ 8 106
557
+ 8 107
558
+ 8 108
559
+ 8 110
560
+ 8 115
561
+ 8 116
562
+ 8 117
563
+ 8 119
564
+ 8 120
565
+ 8 123
566
+ 8 126
567
+ 8 137
568
+ 8 164
569
+ 8 165
570
+ 8 175
571
+ 8 177
572
+ 9 10
573
+ 9 13
574
+ 9 16
575
+ 9 17
576
+ 9 20
577
+ 9 25
578
+ 9 28
579
+ 9 30
580
+ 9 33
581
+ 9 35
582
+ 9 36
583
+ 9 37
584
+ 9 38
585
+ 9 39
586
+ 9 42
587
+ 9 47
588
+ 9 49
589
+ 9 53
590
+ 9 55
591
+ 9 56
592
+ 9 57
593
+ 9 58
594
+ 9 62
595
+ 9 64
596
+ 9 101
597
+ 9 108
598
+ 9 110
599
+ 9 116
600
+ 9 117
601
+ 9 118
602
+ 9 120
603
+ 9 127
604
+ 9 137
605
+ 10 12
606
+ 10 13
607
+ 10 14
608
+ 10 16
609
+ 10 17
610
+ 10 18
611
+ 10 19
612
+ 10 20
613
+ 10 25
614
+ 10 30
615
+ 10 32
616
+ 10 33
617
+ 10 36
618
+ 10 37
619
+ 10 38
620
+ 10 39
621
+ 10 41
622
+ 10 42
623
+ 10 46
624
+ 10 47
625
+ 10 48
626
+ 10 51
627
+ 10 53
628
+ 10 55
629
+ 10 56
630
+ 10 57
631
+ 10 62
632
+ 10 64
633
+ 10 95
634
+ 10 97
635
+ 10 101
636
+ 10 103
637
+ 10 116
638
+ 10 126
639
+ 10 127
640
+ 10 165
641
+ 10 208
642
+ 11 13
643
+ 11 15
644
+ 11 16
645
+ 11 17
646
+ 11 18
647
+ 11 20
648
+ 11 21
649
+ 11 22
650
+ 11 23
651
+ 11 25
652
+ 11 28
653
+ 11 30
654
+ 11 33
655
+ 11 35
656
+ 11 36
657
+ 11 37
658
+ 11 38
659
+ 11 41
660
+ 11 42
661
+ 11 43
662
+ 11 45
663
+ 11 46
664
+ 11 47
665
+ 11 48
666
+ 11 49
667
+ 11 50
668
+ 11 51
669
+ 11 53
670
+ 11 54
671
+ 11 55
672
+ 11 56
673
+ 11 57
674
+ 11 97
675
+ 11 99
676
+ 11 103
677
+ 11 104
678
+ 11 105
679
+ 11 106
680
+ 11 107
681
+ 11 108
682
+ 11 110
683
+ 11 112
684
+ 11 117
685
+ 11 119
686
+ 11 120
687
+ 11 123
688
+ 11 131
689
+ 11 164
690
+ 11 175
691
+ 12 15
692
+ 12 16
693
+ 12 21
694
+ 12 23
695
+ 12 25
696
+ 12 33
697
+ 12 35
698
+ 12 36
699
+ 12 37
700
+ 12 38
701
+ 12 43
702
+ 12 47
703
+ 12 49
704
+ 12 50
705
+ 12 51
706
+ 12 53
707
+ 12 56
708
+ 12 97
709
+ 12 101
710
+ 12 104
711
+ 12 106
712
+ 12 110
713
+ 12 117
714
+ 13 15
715
+ 13 16
716
+ 13 17
717
+ 13 18
718
+ 13 20
719
+ 13 21
720
+ 13 22
721
+ 13 23
722
+ 13 25
723
+ 13 27
724
+ 13 28
725
+ 13 30
726
+ 13 32
727
+ 13 33
728
+ 13 34
729
+ 13 35
730
+ 13 36
731
+ 13 37
732
+ 13 38
733
+ 13 39
734
+ 13 41
735
+ 13 43
736
+ 13 44
737
+ 13 45
738
+ 13 46
739
+ 13 47
740
+ 13 48
741
+ 13 49
742
+ 13 50
743
+ 13 51
744
+ 13 52
745
+ 13 53
746
+ 13 54
747
+ 13 55
748
+ 13 56
749
+ 13 57
750
+ 13 60
751
+ 13 61
752
+ 13 63
753
+ 13 65
754
+ 13 69
755
+ 13 80
756
+ 13 87
757
+ 13 95
758
+ 13 97
759
+ 13 98
760
+ 13 99
761
+ 13 100
762
+ 13 101
763
+ 13 103
764
+ 13 104
765
+ 13 105
766
+ 13 106
767
+ 13 107
768
+ 13 108
769
+ 13 110
770
+ 13 111
771
+ 13 112
772
+ 13 115
773
+ 13 116
774
+ 13 117
775
+ 13 118
776
+ 13 119
777
+ 13 120
778
+ 13 121
779
+ 13 122
780
+ 13 123
781
+ 13 126
782
+ 13 127
783
+ 13 128
784
+ 13 137
785
+ 13 154
786
+ 13 155
787
+ 13 156
788
+ 13 157
789
+ 13 164
790
+ 13 175
791
+ 13 196
792
+ 14 15
793
+ 14 16
794
+ 14 19
795
+ 14 20
796
+ 14 21
797
+ 14 29
798
+ 14 30
799
+ 14 31
800
+ 14 32
801
+ 14 34
802
+ 14 37
803
+ 14 51
804
+ 14 57
805
+ 14 101
806
+ 14 103
807
+ 14 116
808
+ 14 118
809
+ 14 122
810
+ 14 126
811
+ 14 127
812
+ 14 162
813
+ 15 16
814
+ 15 17
815
+ 15 18
816
+ 15 19
817
+ 15 20
818
+ 15 21
819
+ 15 22
820
+ 15 23
821
+ 15 24
822
+ 15 25
823
+ 15 26
824
+ 15 27
825
+ 15 28
826
+ 15 29
827
+ 15 30
828
+ 15 31
829
+ 15 33
830
+ 15 35
831
+ 15 37
832
+ 15 38
833
+ 15 39
834
+ 15 40
835
+ 15 41
836
+ 15 42
837
+ 15 43
838
+ 15 46
839
+ 15 47
840
+ 15 49
841
+ 15 50
842
+ 15 51
843
+ 15 54
844
+ 15 56
845
+ 15 60
846
+ 15 63
847
+ 15 64
848
+ 15 68
849
+ 15 69
850
+ 15 97
851
+ 15 99
852
+ 15 101
853
+ 15 102
854
+ 15 103
855
+ 15 104
856
+ 15 105
857
+ 15 108
858
+ 15 112
859
+ 15 114
860
+ 15 115
861
+ 15 116
862
+ 15 118
863
+ 15 122
864
+ 15 126
865
+ 15 127
866
+ 15 128
867
+ 15 129
868
+ 15 130
869
+ 15 131
870
+ 15 135
871
+ 15 136
872
+ 15 158
873
+ 16 17
874
+ 16 18
875
+ 16 19
876
+ 16 20
877
+ 16 21
878
+ 16 22
879
+ 16 23
880
+ 16 24
881
+ 16 25
882
+ 16 26
883
+ 16 27
884
+ 16 28
885
+ 16 29
886
+ 16 30
887
+ 16 31
888
+ 16 32
889
+ 16 33
890
+ 16 34
891
+ 16 35
892
+ 16 36
893
+ 16 37
894
+ 16 38
895
+ 16 39
896
+ 16 40
897
+ 16 41
898
+ 16 42
899
+ 16 43
900
+ 16 44
901
+ 16 45
902
+ 16 46
903
+ 16 47
904
+ 16 48
905
+ 16 49
906
+ 16 50
907
+ 16 51
908
+ 16 52
909
+ 16 53
910
+ 16 54
911
+ 16 55
912
+ 16 56
913
+ 16 57
914
+ 16 60
915
+ 16 62
916
+ 16 64
917
+ 16 65
918
+ 16 66
919
+ 16 68
920
+ 16 69
921
+ 16 70
922
+ 16 71
923
+ 16 73
924
+ 16 74
925
+ 16 80
926
+ 16 81
927
+ 16 87
928
+ 16 91
929
+ 16 92
930
+ 16 93
931
+ 16 94
932
+ 16 95
933
+ 16 96
934
+ 16 97
935
+ 16 98
936
+ 16 99
937
+ 16 100
938
+ 16 101
939
+ 16 102
940
+ 16 103
941
+ 16 104
942
+ 16 105
943
+ 16 106
944
+ 16 107
945
+ 16 108
946
+ 16 109
947
+ 16 110
948
+ 16 111
949
+ 16 112
950
+ 16 113
951
+ 16 114
952
+ 16 115
953
+ 16 116
954
+ 16 117
955
+ 16 118
956
+ 16 119
957
+ 16 120
958
+ 16 121
959
+ 16 122
960
+ 16 123
961
+ 16 124
962
+ 16 126
963
+ 16 127
964
+ 16 128
965
+ 16 129
966
+ 16 131
967
+ 16 133
968
+ 16 134
969
+ 16 137
970
+ 16 138
971
+ 16 139
972
+ 16 140
973
+ 16 142
974
+ 16 145
975
+ 16 146
976
+ 16 147
977
+ 16 148
978
+ 16 149
979
+ 16 150
980
+ 16 151
981
+ 16 152
982
+ 16 154
983
+ 16 160
984
+ 16 162
985
+ 16 164
986
+ 16 165
987
+ 16 175
988
+ 16 176
989
+ 16 177
990
+ 16 183
991
+ 16 197
992
+ 16 202
993
+ 16 204
994
+ 16 214
995
+ 17 18
996
+ 17 19
997
+ 17 20
998
+ 17 21
999
+ 17 22
1000
+ 17 23
1001
+ 17 25
1002
+ 17 27
1003
+ 17 28
1004
+ 17 29
1005
+ 17 30
1006
+ 17 31
1007
+ 17 32
1008
+ 17 33
1009
+ 17 34
1010
+ 17 35
1011
+ 17 36
1012
+ 17 37
1013
+ 17 38
1014
+ 17 39
1015
+ 17 40
1016
+ 17 41
1017
+ 17 42
1018
+ 17 43
1019
+ 17 47
1020
+ 17 50
1021
+ 17 51
1022
+ 17 53
1023
+ 17 54
1024
+ 17 55
1025
+ 17 56
1026
+ 17 57
1027
+ 17 60
1028
+ 17 62
1029
+ 17 64
1030
+ 17 95
1031
+ 17 97
1032
+ 17 98
1033
+ 17 99
1034
+ 17 101
1035
+ 17 103
1036
+ 17 104
1037
+ 17 105
1038
+ 17 106
1039
+ 17 110
1040
+ 17 112
1041
+ 17 115
1042
+ 17 116
1043
+ 17 117
1044
+ 17 118
1045
+ 17 122
1046
+ 17 127
1047
+ 17 128
1048
+ 17 129
1049
+ 17 131
1050
+ 17 137
1051
+ 17 164
1052
+ 17 165
1053
+ 17 197
1054
+ 18 21
1055
+ 18 22
1056
+ 18 23
1057
+ 18 25
1058
+ 18 28
1059
+ 18 30
1060
+ 18 32
1061
+ 18 33
1062
+ 18 35
1063
+ 18 36
1064
+ 18 38
1065
+ 18 43
1066
+ 18 46
1067
+ 18 47
1068
+ 18 48
1069
+ 18 49
1070
+ 18 53
1071
+ 18 55
1072
+ 18 57
1073
+ 18 95
1074
+ 18 97
1075
+ 18 104
1076
+ 18 106
1077
+ 18 107
1078
+ 18 108
1079
+ 19 21
1080
+ 19 22
1081
+ 19 23
1082
+ 19 25
1083
+ 19 27
1084
+ 19 28
1085
+ 19 30
1086
+ 19 32
1087
+ 19 33
1088
+ 19 34
1089
+ 19 35
1090
+ 19 38
1091
+ 19 40
1092
+ 19 41
1093
+ 19 47
1094
+ 19 50
1095
+ 19 51
1096
+ 19 101
1097
+ 19 103
1098
+ 19 112
1099
+ 19 116
1100
+ 19 126
1101
+ 19 129
1102
+ 19 158
1103
+ 19 168
1104
+ 19 170
1105
+ 20 21
1106
+ 20 22
1107
+ 20 23
1108
+ 20 25
1109
+ 20 27
1110
+ 20 28
1111
+ 20 30
1112
+ 20 31
1113
+ 20 32
1114
+ 20 33
1115
+ 20 34
1116
+ 20 35
1117
+ 20 36
1118
+ 20 38
1119
+ 20 39
1120
+ 20 40
1121
+ 20 41
1122
+ 20 42
1123
+ 20 43
1124
+ 20 44
1125
+ 20 47
1126
+ 20 50
1127
+ 20 51
1128
+ 20 53
1129
+ 20 54
1130
+ 20 55
1131
+ 20 56
1132
+ 20 57
1133
+ 20 59
1134
+ 20 60
1135
+ 20 62
1136
+ 20 63
1137
+ 20 64
1138
+ 20 65
1139
+ 20 66
1140
+ 20 69
1141
+ 20 80
1142
+ 20 85
1143
+ 20 87
1144
+ 20 97
1145
+ 20 98
1146
+ 20 99
1147
+ 20 100
1148
+ 20 101
1149
+ 20 103
1150
+ 20 104
1151
+ 20 105
1152
+ 20 106
1153
+ 20 110
1154
+ 20 111
1155
+ 20 116
1156
+ 20 117
1157
+ 20 120
1158
+ 20 121
1159
+ 20 123
1160
+ 20 126
1161
+ 20 131
1162
+ 20 137
1163
+ 20 161
1164
+ 21 24
1165
+ 21 25
1166
+ 21 26
1167
+ 21 27
1168
+ 21 29
1169
+ 21 31
1170
+ 21 32
1171
+ 21 33
1172
+ 21 34
1173
+ 21 35
1174
+ 21 36
1175
+ 21 37
1176
+ 21 38
1177
+ 21 39
1178
+ 21 40
1179
+ 21 41
1180
+ 21 42
1181
+ 21 43
1182
+ 21 44
1183
+ 21 46
1184
+ 21 47
1185
+ 21 48
1186
+ 21 49
1187
+ 21 50
1188
+ 21 51
1189
+ 21 52
1190
+ 21 54
1191
+ 21 55
1192
+ 21 57
1193
+ 21 64
1194
+ 21 69
1195
+ 21 80
1196
+ 21 96
1197
+ 21 97
1198
+ 21 99
1199
+ 21 100
1200
+ 21 101
1201
+ 21 103
1202
+ 21 104
1203
+ 21 105
1204
+ 21 107
1205
+ 21 108
1206
+ 21 111
1207
+ 21 112
1208
+ 21 113
1209
+ 21 114
1210
+ 21 115
1211
+ 21 116
1212
+ 21 117
1213
+ 21 118
1214
+ 21 121
1215
+ 21 122
1216
+ 21 124
1217
+ 21 126
1218
+ 21 128
1219
+ 21 129
1220
+ 21 130
1221
+ 21 131
1222
+ 21 135
1223
+ 21 136
1224
+ 21 164
1225
+ 22 25
1226
+ 22 26
1227
+ 22 27
1228
+ 22 29
1229
+ 22 31
1230
+ 22 32
1231
+ 22 33
1232
+ 22 34
1233
+ 22 35
1234
+ 22 37
1235
+ 22 38
1236
+ 22 39
1237
+ 22 40
1238
+ 22 41
1239
+ 22 43
1240
+ 22 44
1241
+ 22 46
1242
+ 22 47
1243
+ 22 48
1244
+ 22 49
1245
+ 22 50
1246
+ 22 51
1247
+ 22 55
1248
+ 22 57
1249
+ 22 64
1250
+ 22 69
1251
+ 22 80
1252
+ 22 95
1253
+ 22 97
1254
+ 22 99
1255
+ 22 100
1256
+ 22 101
1257
+ 22 103
1258
+ 22 104
1259
+ 22 105
1260
+ 22 107
1261
+ 22 108
1262
+ 22 111
1263
+ 22 112
1264
+ 22 114
1265
+ 22 115
1266
+ 22 116
1267
+ 22 117
1268
+ 22 121
1269
+ 22 122
1270
+ 22 126
1271
+ 22 128
1272
+ 22 129
1273
+ 22 131
1274
+ 22 136
1275
+ 23 24
1276
+ 23 25
1277
+ 23 26
1278
+ 23 27
1279
+ 23 29
1280
+ 23 31
1281
+ 23 32
1282
+ 23 33
1283
+ 23 34
1284
+ 23 35
1285
+ 23 36
1286
+ 23 37
1287
+ 23 38
1288
+ 23 39
1289
+ 23 40
1290
+ 23 41
1291
+ 23 43
1292
+ 23 46
1293
+ 23 47
1294
+ 23 48
1295
+ 23 50
1296
+ 23 51
1297
+ 23 54
1298
+ 23 59
1299
+ 23 63
1300
+ 23 65
1301
+ 23 68
1302
+ 23 69
1303
+ 23 80
1304
+ 23 83
1305
+ 23 85
1306
+ 23 97
1307
+ 23 99
1308
+ 23 100
1309
+ 23 101
1310
+ 23 102
1311
+ 23 103
1312
+ 23 104
1313
+ 23 105
1314
+ 23 107
1315
+ 23 108
1316
+ 23 111
1317
+ 23 112
1318
+ 23 114
1319
+ 23 115
1320
+ 23 116
1321
+ 23 117
1322
+ 23 118
1323
+ 23 121
1324
+ 23 122
1325
+ 23 124
1326
+ 23 126
1327
+ 23 127
1328
+ 23 128
1329
+ 23 129
1330
+ 23 130
1331
+ 23 131
1332
+ 23 135
1333
+ 23 136
1334
+ 23 158
1335
+ 23 162
1336
+ 24 27
1337
+ 24 28
1338
+ 24 29
1339
+ 24 31
1340
+ 24 32
1341
+ 24 34
1342
+ 24 35
1343
+ 24 39
1344
+ 24 40
1345
+ 24 43
1346
+ 24 51
1347
+ 24 59
1348
+ 24 63
1349
+ 24 65
1350
+ 24 69
1351
+ 24 80
1352
+ 24 85
1353
+ 24 97
1354
+ 24 101
1355
+ 24 103
1356
+ 24 111
1357
+ 24 114
1358
+ 24 115
1359
+ 24 116
1360
+ 24 122
1361
+ 24 126
1362
+ 24 130
1363
+ 24 161
1364
+ 25 27
1365
+ 25 28
1366
+ 25 29
1367
+ 25 30
1368
+ 25 31
1369
+ 25 32
1370
+ 25 33
1371
+ 25 34
1372
+ 25 35
1373
+ 25 36
1374
+ 25 37
1375
+ 25 38
1376
+ 25 39
1377
+ 25 40
1378
+ 25 41
1379
+ 25 42
1380
+ 25 43
1381
+ 25 44
1382
+ 25 45
1383
+ 25 46
1384
+ 25 47
1385
+ 25 48
1386
+ 25 49
1387
+ 25 50
1388
+ 25 51
1389
+ 25 53
1390
+ 25 54
1391
+ 25 55
1392
+ 25 56
1393
+ 25 57
1394
+ 25 60
1395
+ 25 62
1396
+ 25 63
1397
+ 25 64
1398
+ 25 65
1399
+ 25 66
1400
+ 25 68
1401
+ 25 69
1402
+ 25 71
1403
+ 25 80
1404
+ 25 87
1405
+ 25 91
1406
+ 25 92
1407
+ 25 94
1408
+ 25 97
1409
+ 25 98
1410
+ 25 99
1411
+ 25 100
1412
+ 25 102
1413
+ 25 103
1414
+ 25 104
1415
+ 25 105
1416
+ 25 106
1417
+ 25 107
1418
+ 25 108
1419
+ 25 110
1420
+ 25 111
1421
+ 25 112
1422
+ 25 114
1423
+ 25 115
1424
+ 25 116
1425
+ 25 117
1426
+ 25 118
1427
+ 25 119
1428
+ 25 120
1429
+ 25 121
1430
+ 25 122
1431
+ 25 123
1432
+ 25 124
1433
+ 25 126
1434
+ 25 127
1435
+ 25 128
1436
+ 25 129
1437
+ 25 131
1438
+ 25 137
1439
+ 25 139
1440
+ 25 150
1441
+ 25 151
1442
+ 25 152
1443
+ 25 154
1444
+ 25 164
1445
+ 25 165
1446
+ 25 175
1447
+ 25 176
1448
+ 25 202
1449
+ 25 204
1450
+ 25 214
1451
+ 26 28
1452
+ 26 31
1453
+ 26 32
1454
+ 26 33
1455
+ 26 34
1456
+ 26 38
1457
+ 26 39
1458
+ 26 41
1459
+ 26 48
1460
+ 26 51
1461
+ 26 68
1462
+ 26 112
1463
+ 26 131
1464
+ 26 135
1465
+ 26 136
1466
+ 27 28
1467
+ 27 30
1468
+ 27 32
1469
+ 27 33
1470
+ 27 34
1471
+ 27 35
1472
+ 27 37
1473
+ 27 38
1474
+ 27 41
1475
+ 27 42
1476
+ 27 43
1477
+ 27 46
1478
+ 27 47
1479
+ 27 50
1480
+ 27 51
1481
+ 27 64
1482
+ 27 80
1483
+ 27 97
1484
+ 27 98
1485
+ 27 101
1486
+ 27 102
1487
+ 27 103
1488
+ 27 105
1489
+ 27 106
1490
+ 27 107
1491
+ 27 108
1492
+ 27 111
1493
+ 27 112
1494
+ 27 115
1495
+ 27 116
1496
+ 27 118
1497
+ 27 122
1498
+ 27 126
1499
+ 27 127
1500
+ 27 128
1501
+ 27 129
1502
+ 27 131
1503
+ 28 29
1504
+ 28 31
1505
+ 28 32
1506
+ 28 33
1507
+ 28 34
1508
+ 28 35
1509
+ 28 36
1510
+ 28 37
1511
+ 28 38
1512
+ 28 39
1513
+ 28 40
1514
+ 28 41
1515
+ 28 42
1516
+ 28 43
1517
+ 28 44
1518
+ 28 46
1519
+ 28 47
1520
+ 28 48
1521
+ 28 49
1522
+ 28 50
1523
+ 28 51
1524
+ 28 52
1525
+ 28 54
1526
+ 28 55
1527
+ 28 56
1528
+ 28 57
1529
+ 28 64
1530
+ 28 68
1531
+ 28 69
1532
+ 28 80
1533
+ 28 95
1534
+ 28 96
1535
+ 28 97
1536
+ 28 99
1537
+ 28 100
1538
+ 28 101
1539
+ 28 102
1540
+ 28 103
1541
+ 28 104
1542
+ 28 105
1543
+ 28 107
1544
+ 28 108
1545
+ 28 111
1546
+ 28 112
1547
+ 28 113
1548
+ 28 114
1549
+ 28 115
1550
+ 28 116
1551
+ 28 117
1552
+ 28 120
1553
+ 28 121
1554
+ 28 122
1555
+ 28 124
1556
+ 28 126
1557
+ 28 128
1558
+ 28 129
1559
+ 28 131
1560
+ 28 135
1561
+ 28 136
1562
+ 28 164
1563
+ 28 175
1564
+ 29 30
1565
+ 29 31
1566
+ 29 32
1567
+ 29 33
1568
+ 29 34
1569
+ 29 35
1570
+ 29 36
1571
+ 29 39
1572
+ 29 40
1573
+ 29 41
1574
+ 29 47
1575
+ 29 50
1576
+ 29 51
1577
+ 29 101
1578
+ 29 103
1579
+ 29 112
1580
+ 29 115
1581
+ 29 116
1582
+ 29 126
1583
+ 29 129
1584
+ 29 158
1585
+ 29 168
1586
+ 30 32
1587
+ 30 33
1588
+ 30 35
1589
+ 30 36
1590
+ 30 37
1591
+ 30 38
1592
+ 30 39
1593
+ 30 41
1594
+ 30 42
1595
+ 30 43
1596
+ 30 45
1597
+ 30 47
1598
+ 30 50
1599
+ 30 51
1600
+ 30 53
1601
+ 30 54
1602
+ 30 55
1603
+ 30 56
1604
+ 30 57
1605
+ 30 59
1606
+ 30 60
1607
+ 30 62
1608
+ 30 63
1609
+ 30 64
1610
+ 30 65
1611
+ 30 68
1612
+ 30 69
1613
+ 30 80
1614
+ 30 85
1615
+ 30 87
1616
+ 30 95
1617
+ 30 97
1618
+ 30 98
1619
+ 30 101
1620
+ 30 103
1621
+ 30 104
1622
+ 30 105
1623
+ 30 106
1624
+ 30 107
1625
+ 30 108
1626
+ 30 110
1627
+ 30 111
1628
+ 30 112
1629
+ 30 115
1630
+ 30 116
1631
+ 30 117
1632
+ 30 118
1633
+ 30 119
1634
+ 30 120
1635
+ 30 122
1636
+ 30 123
1637
+ 30 126
1638
+ 30 127
1639
+ 30 128
1640
+ 30 129
1641
+ 30 137
1642
+ 30 139
1643
+ 30 158
1644
+ 30 164
1645
+ 30 165
1646
+ 30 202
1647
+ 31 32
1648
+ 31 34
1649
+ 31 35
1650
+ 31 37
1651
+ 31 38
1652
+ 31 39
1653
+ 31 40
1654
+ 31 41
1655
+ 31 46
1656
+ 31 47
1657
+ 31 50
1658
+ 31 55
1659
+ 31 57
1660
+ 31 69
1661
+ 31 101
1662
+ 31 103
1663
+ 31 112
1664
+ 31 116
1665
+ 31 118
1666
+ 31 122
1667
+ 31 126
1668
+ 31 127
1669
+ 31 129
1670
+ 31 131
1671
+ 31 162
1672
+ 32 33
1673
+ 32 35
1674
+ 32 36
1675
+ 32 37
1676
+ 32 38
1677
+ 32 39
1678
+ 32 40
1679
+ 32 41
1680
+ 32 42
1681
+ 32 43
1682
+ 32 46
1683
+ 32 47
1684
+ 32 50
1685
+ 32 51
1686
+ 32 53
1687
+ 32 54
1688
+ 32 55
1689
+ 32 56
1690
+ 32 57
1691
+ 32 62
1692
+ 32 64
1693
+ 32 65
1694
+ 32 68
1695
+ 32 69
1696
+ 32 80
1697
+ 32 97
1698
+ 32 101
1699
+ 32 102
1700
+ 32 103
1701
+ 32 104
1702
+ 32 105
1703
+ 32 107
1704
+ 32 111
1705
+ 32 112
1706
+ 32 116
1707
+ 32 118
1708
+ 32 122
1709
+ 32 126
1710
+ 32 127
1711
+ 32 128
1712
+ 32 129
1713
+ 32 131
1714
+ 32 135
1715
+ 32 136
1716
+ 32 137
1717
+ 32 158
1718
+ 32 202
1719
+ 33 34
1720
+ 33 35
1721
+ 33 36
1722
+ 33 37
1723
+ 33 38
1724
+ 33 39
1725
+ 33 40
1726
+ 33 41
1727
+ 33 42
1728
+ 33 43
1729
+ 33 44
1730
+ 33 45
1731
+ 33 46
1732
+ 33 47
1733
+ 33 48
1734
+ 33 49
1735
+ 33 51
1736
+ 33 52
1737
+ 33 53
1738
+ 33 54
1739
+ 33 55
1740
+ 33 56
1741
+ 33 57
1742
+ 33 60
1743
+ 33 62
1744
+ 33 64
1745
+ 33 66
1746
+ 33 68
1747
+ 33 80
1748
+ 33 97
1749
+ 33 98
1750
+ 33 99
1751
+ 33 103
1752
+ 33 104
1753
+ 33 105
1754
+ 33 106
1755
+ 33 107
1756
+ 33 108
1757
+ 33 110
1758
+ 33 111
1759
+ 33 112
1760
+ 33 115
1761
+ 33 116
1762
+ 33 117
1763
+ 33 118
1764
+ 33 119
1765
+ 33 120
1766
+ 33 121
1767
+ 33 123
1768
+ 33 125
1769
+ 33 126
1770
+ 33 127
1771
+ 33 128
1772
+ 33 131
1773
+ 33 137
1774
+ 33 154
1775
+ 33 164
1776
+ 33 175
1777
+ 33 197
1778
+ 33 202
1779
+ 34 35
1780
+ 34 37
1781
+ 34 38
1782
+ 34 39
1783
+ 34 40
1784
+ 34 41
1785
+ 34 42
1786
+ 34 43
1787
+ 34 46
1788
+ 34 47
1789
+ 34 50
1790
+ 34 51
1791
+ 34 56
1792
+ 34 101
1793
+ 34 103
1794
+ 34 104
1795
+ 34 112
1796
+ 34 115
1797
+ 34 116
1798
+ 34 122
1799
+ 34 127
1800
+ 34 131
1801
+ 34 135
1802
+ 34 136
1803
+ 35 36
1804
+ 35 37
1805
+ 35 38
1806
+ 35 40
1807
+ 35 41
1808
+ 35 42
1809
+ 35 43
1810
+ 35 44
1811
+ 35 45
1812
+ 35 46
1813
+ 35 47
1814
+ 35 48
1815
+ 35 49
1816
+ 35 50
1817
+ 35 51
1818
+ 35 52
1819
+ 35 53
1820
+ 35 54
1821
+ 35 55
1822
+ 35 56
1823
+ 35 57
1824
+ 35 60
1825
+ 35 62
1826
+ 35 64
1827
+ 35 65
1828
+ 35 66
1829
+ 35 68
1830
+ 35 69
1831
+ 35 80
1832
+ 35 87
1833
+ 35 95
1834
+ 35 97
1835
+ 35 98
1836
+ 35 99
1837
+ 35 100
1838
+ 35 101
1839
+ 35 102
1840
+ 35 103
1841
+ 35 104
1842
+ 35 105
1843
+ 35 106
1844
+ 35 107
1845
+ 35 109
1846
+ 35 110
1847
+ 35 111
1848
+ 35 112
1849
+ 35 113
1850
+ 35 114
1851
+ 35 115
1852
+ 35 116
1853
+ 35 117
1854
+ 35 118
1855
+ 35 119
1856
+ 35 120
1857
+ 35 122
1858
+ 35 124
1859
+ 35 125
1860
+ 35 126
1861
+ 35 127
1862
+ 35 128
1863
+ 35 130
1864
+ 35 137
1865
+ 35 162
1866
+ 35 164
1867
+ 35 197
1868
+ 36 37
1869
+ 36 38
1870
+ 36 39
1871
+ 36 41
1872
+ 36 42
1873
+ 36 43
1874
+ 36 44
1875
+ 36 45
1876
+ 36 46
1877
+ 36 47
1878
+ 36 48
1879
+ 36 49
1880
+ 36 50
1881
+ 36 51
1882
+ 36 52
1883
+ 36 53
1884
+ 36 54
1885
+ 36 55
1886
+ 36 56
1887
+ 36 57
1888
+ 36 60
1889
+ 36 62
1890
+ 36 63
1891
+ 36 64
1892
+ 36 65
1893
+ 36 66
1894
+ 36 68
1895
+ 36 69
1896
+ 36 70
1897
+ 36 71
1898
+ 36 73
1899
+ 36 76
1900
+ 36 77
1901
+ 36 80
1902
+ 36 81
1903
+ 36 85
1904
+ 36 87
1905
+ 36 91
1906
+ 36 92
1907
+ 36 93
1908
+ 36 94
1909
+ 36 95
1910
+ 36 97
1911
+ 36 98
1912
+ 36 99
1913
+ 36 100
1914
+ 36 101
1915
+ 36 102
1916
+ 36 104
1917
+ 36 105
1918
+ 36 106
1919
+ 36 107
1920
+ 36 108
1921
+ 36 110
1922
+ 36 111
1923
+ 36 112
1924
+ 36 115
1925
+ 36 116
1926
+ 36 117
1927
+ 36 118
1928
+ 36 120
1929
+ 36 123
1930
+ 36 124
1931
+ 36 125
1932
+ 36 128
1933
+ 36 137
1934
+ 36 138
1935
+ 36 139
1936
+ 36 140
1937
+ 36 142
1938
+ 36 145
1939
+ 36 146
1940
+ 36 147
1941
+ 36 148
1942
+ 36 149
1943
+ 36 150
1944
+ 36 151
1945
+ 36 152
1946
+ 36 154
1947
+ 36 160
1948
+ 36 164
1949
+ 36 175
1950
+ 36 176
1951
+ 36 177
1952
+ 36 204
1953
+ 36 214
1954
+ 37 38
1955
+ 37 39
1956
+ 37 40
1957
+ 37 41
1958
+ 37 42
1959
+ 37 43
1960
+ 37 44
1961
+ 37 47
1962
+ 37 50
1963
+ 37 51
1964
+ 37 53
1965
+ 37 54
1966
+ 37 55
1967
+ 37 56
1968
+ 37 57
1969
+ 37 59
1970
+ 37 60
1971
+ 37 62
1972
+ 37 63
1973
+ 37 64
1974
+ 37 65
1975
+ 37 66
1976
+ 37 69
1977
+ 37 80
1978
+ 37 85
1979
+ 37 97
1980
+ 37 98
1981
+ 37 99
1982
+ 37 101
1983
+ 37 103
1984
+ 37 104
1985
+ 37 105
1986
+ 37 106
1987
+ 37 107
1988
+ 37 110
1989
+ 37 111
1990
+ 37 116
1991
+ 37 117
1992
+ 37 118
1993
+ 37 120
1994
+ 37 121
1995
+ 37 122
1996
+ 37 123
1997
+ 37 126
1998
+ 37 127
1999
+ 37 128
2000
+ 37 130
2001
+ 37 131
2002
+ 37 137
2003
+ 37 161
2004
+ 37 164
2005
+ 37 202
2006
+ 38 39
2007
+ 38 40
2008
+ 38 41
2009
+ 38 42
2010
+ 38 43
2011
+ 38 45
2012
+ 38 46
2013
+ 38 47
2014
+ 38 48
2015
+ 38 49
2016
+ 38 50
2017
+ 38 51
2018
+ 38 52
2019
+ 38 53
2020
+ 38 54
2021
+ 38 55
2022
+ 38 56
2023
+ 38 57
2024
+ 38 62
2025
+ 38 64
2026
+ 38 66
2027
+ 38 87
2028
+ 38 95
2029
+ 38 97
2030
+ 38 99
2031
+ 38 102
2032
+ 38 103
2033
+ 38 104
2034
+ 38 105
2035
+ 38 106
2036
+ 38 107
2037
+ 38 108
2038
+ 38 112
2039
+ 38 115
2040
+ 38 116
2041
+ 38 117
2042
+ 38 118
2043
+ 38 119
2044
+ 38 120
2045
+ 38 121
2046
+ 38 123
2047
+ 38 125
2048
+ 38 126
2049
+ 38 127
2050
+ 38 128
2051
+ 38 131
2052
+ 38 164
2053
+ 38 175
2054
+ 38 197
2055
+ 38 202
2056
+ 39 40
2057
+ 39 42
2058
+ 39 47
2059
+ 39 51
2060
+ 39 55
2061
+ 39 57
2062
+ 39 101
2063
+ 39 103
2064
+ 39 112
2065
+ 39 116
2066
+ 39 118
2067
+ 39 122
2068
+ 39 126
2069
+ 39 127
2070
+ 39 131
2071
+ 39 137
2072
+ 39 162
2073
+ 40 41
2074
+ 40 46
2075
+ 40 48
2076
+ 40 51
2077
+ 40 57
2078
+ 40 59
2079
+ 40 63
2080
+ 40 67
2081
+ 40 68
2082
+ 40 83
2083
+ 40 102
2084
+ 40 103
2085
+ 40 104
2086
+ 40 112
2087
+ 40 122
2088
+ 40 128
2089
+ 40 131
2090
+ 40 135
2091
+ 40 136
2092
+ 41 42
2093
+ 41 46
2094
+ 41 47
2095
+ 41 48
2096
+ 41 50
2097
+ 41 51
2098
+ 41 55
2099
+ 41 57
2100
+ 41 69
2101
+ 41 95
2102
+ 41 97
2103
+ 41 101
2104
+ 41 103
2105
+ 41 104
2106
+ 41 115
2107
+ 41 116
2108
+ 41 120
2109
+ 41 122
2110
+ 41 124
2111
+ 41 127
2112
+ 41 131
2113
+ 41 135
2114
+ 41 164
2115
+ 42 43
2116
+ 42 44
2117
+ 42 45
2118
+ 42 46
2119
+ 42 47
2120
+ 42 48
2121
+ 42 49
2122
+ 42 50
2123
+ 42 51
2124
+ 42 53
2125
+ 42 54
2126
+ 42 55
2127
+ 42 56
2128
+ 42 57
2129
+ 42 59
2130
+ 42 60
2131
+ 42 61
2132
+ 42 63
2133
+ 42 65
2134
+ 42 69
2135
+ 42 82
2136
+ 42 87
2137
+ 42 89
2138
+ 42 91
2139
+ 42 97
2140
+ 42 98
2141
+ 42 99
2142
+ 42 100
2143
+ 42 101
2144
+ 42 102
2145
+ 42 103
2146
+ 42 104
2147
+ 42 105
2148
+ 42 106
2149
+ 42 107
2150
+ 42 108
2151
+ 42 109
2152
+ 42 110
2153
+ 42 111
2154
+ 42 112
2155
+ 42 115
2156
+ 42 116
2157
+ 42 117
2158
+ 42 118
2159
+ 42 119
2160
+ 42 120
2161
+ 42 121
2162
+ 42 122
2163
+ 42 123
2164
+ 42 126
2165
+ 42 128
2166
+ 42 137
2167
+ 42 154
2168
+ 42 156
2169
+ 42 157
2170
+ 42 160
2171
+ 42 164
2172
+ 42 175
2173
+ 42 196
2174
+ 42 197
2175
+ 43 44
2176
+ 43 45
2177
+ 43 46
2178
+ 43 47
2179
+ 43 48
2180
+ 43 49
2181
+ 43 50
2182
+ 43 51
2183
+ 43 53
2184
+ 43 54
2185
+ 43 56
2186
+ 43 62
2187
+ 43 64
2188
+ 43 66
2189
+ 43 68
2190
+ 43 69
2191
+ 43 80
2192
+ 43 95
2193
+ 43 97
2194
+ 43 98
2195
+ 43 99
2196
+ 43 100
2197
+ 43 102
2198
+ 43 105
2199
+ 43 106
2200
+ 43 107
2201
+ 43 108
2202
+ 43 109
2203
+ 43 110
2204
+ 43 111
2205
+ 43 112
2206
+ 43 113
2207
+ 43 114
2208
+ 43 115
2209
+ 43 117
2210
+ 43 118
2211
+ 43 119
2212
+ 43 120
2213
+ 43 122
2214
+ 43 123
2215
+ 43 124
2216
+ 43 126
2217
+ 43 128
2218
+ 43 137
2219
+ 43 139
2220
+ 43 144
2221
+ 43 164
2222
+ 43 202
2223
+ 44 45
2224
+ 44 46
2225
+ 44 47
2226
+ 44 48
2227
+ 44 49
2228
+ 44 55
2229
+ 44 57
2230
+ 44 68
2231
+ 44 80
2232
+ 44 92
2233
+ 44 97
2234
+ 44 99
2235
+ 44 102
2236
+ 44 104
2237
+ 44 105
2238
+ 44 107
2239
+ 44 108
2240
+ 44 109
2241
+ 44 110
2242
+ 44 111
2243
+ 44 112
2244
+ 44 117
2245
+ 44 120
2246
+ 44 123
2247
+ 44 124
2248
+ 44 128
2249
+ 44 137
2250
+ 44 139
2251
+ 44 150
2252
+ 44 151
2253
+ 44 154
2254
+ 44 175
2255
+ 45 46
2256
+ 45 47
2257
+ 45 48
2258
+ 45 49
2259
+ 45 53
2260
+ 45 54
2261
+ 45 55
2262
+ 45 57
2263
+ 45 60
2264
+ 45 64
2265
+ 45 68
2266
+ 45 80
2267
+ 45 92
2268
+ 45 97
2269
+ 45 99
2270
+ 45 104
2271
+ 45 105
2272
+ 45 106
2273
+ 45 107
2274
+ 45 109
2275
+ 45 111
2276
+ 45 117
2277
+ 45 119
2278
+ 45 120
2279
+ 45 123
2280
+ 45 124
2281
+ 45 128
2282
+ 45 139
2283
+ 45 150
2284
+ 45 151
2285
+ 45 152
2286
+ 45 175
2287
+ 45 176
2288
+ 45 204
2289
+ 46 47
2290
+ 46 48
2291
+ 46 49
2292
+ 46 51
2293
+ 46 53
2294
+ 46 54
2295
+ 46 55
2296
+ 46 56
2297
+ 46 57
2298
+ 46 60
2299
+ 46 64
2300
+ 46 66
2301
+ 46 68
2302
+ 46 80
2303
+ 46 95
2304
+ 46 97
2305
+ 46 98
2306
+ 46 99
2307
+ 46 100
2308
+ 46 103
2309
+ 46 104
2310
+ 46 105
2311
+ 46 106
2312
+ 46 107
2313
+ 46 108
2314
+ 46 109
2315
+ 46 110
2316
+ 46 111
2317
+ 46 112
2318
+ 46 114
2319
+ 46 115
2320
+ 46 117
2321
+ 46 118
2322
+ 46 119
2323
+ 46 120
2324
+ 46 122
2325
+ 46 123
2326
+ 46 124
2327
+ 46 128
2328
+ 46 131
2329
+ 46 137
2330
+ 46 154
2331
+ 46 175
2332
+ 47 48
2333
+ 47 49
2334
+ 47 50
2335
+ 47 52
2336
+ 47 53
2337
+ 47 54
2338
+ 47 55
2339
+ 47 56
2340
+ 47 57
2341
+ 47 60
2342
+ 47 62
2343
+ 47 64
2344
+ 47 66
2345
+ 47 68
2346
+ 47 69
2347
+ 47 80
2348
+ 47 95
2349
+ 47 97
2350
+ 47 98
2351
+ 47 99
2352
+ 47 100
2353
+ 47 103
2354
+ 47 104
2355
+ 47 105
2356
+ 47 106
2357
+ 47 107
2358
+ 47 108
2359
+ 47 110
2360
+ 47 111
2361
+ 47 112
2362
+ 47 114
2363
+ 47 115
2364
+ 47 117
2365
+ 47 118
2366
+ 47 120
2367
+ 47 121
2368
+ 47 123
2369
+ 47 124
2370
+ 47 127
2371
+ 47 128
2372
+ 47 130
2373
+ 47 137
2374
+ 47 139
2375
+ 47 145
2376
+ 47 154
2377
+ 47 160
2378
+ 47 164
2379
+ 47 165
2380
+ 47 175
2381
+ 47 183
2382
+ 47 197
2383
+ 47 202
2384
+ 47 204
2385
+ 47 214
2386
+ 48 49
2387
+ 48 50
2388
+ 48 55
2389
+ 48 56
2390
+ 48 67
2391
+ 48 68
2392
+ 48 95
2393
+ 48 97
2394
+ 48 98
2395
+ 48 99
2396
+ 48 100
2397
+ 48 102
2398
+ 48 104
2399
+ 48 105
2400
+ 48 106
2401
+ 48 107
2402
+ 48 108
2403
+ 48 112
2404
+ 48 114
2405
+ 48 115
2406
+ 48 118
2407
+ 48 119
2408
+ 48 120
2409
+ 48 123
2410
+ 48 124
2411
+ 48 128
2412
+ 48 164
2413
+ 48 175
2414
+ 49 50
2415
+ 49 53
2416
+ 49 54
2417
+ 49 55
2418
+ 49 56
2419
+ 49 57
2420
+ 49 60
2421
+ 49 62
2422
+ 49 64
2423
+ 49 95
2424
+ 49 97
2425
+ 49 98
2426
+ 49 99
2427
+ 49 100
2428
+ 49 102
2429
+ 49 104
2430
+ 49 105
2431
+ 49 106
2432
+ 49 107
2433
+ 49 108
2434
+ 49 110
2435
+ 49 112
2436
+ 49 114
2437
+ 49 115
2438
+ 49 117
2439
+ 49 119
2440
+ 49 123
2441
+ 49 128
2442
+ 49 164
2443
+ 49 175
2444
+ 49 202
2445
+ 50 54
2446
+ 50 56
2447
+ 50 64
2448
+ 50 97
2449
+ 50 98
2450
+ 50 100
2451
+ 50 102
2452
+ 50 106
2453
+ 50 107
2454
+ 50 108
2455
+ 50 115
2456
+ 50 117
2457
+ 50 119
2458
+ 50 121
2459
+ 50 122
2460
+ 50 128
2461
+ 50 197
2462
+ 51 52
2463
+ 51 54
2464
+ 51 56
2465
+ 51 64
2466
+ 51 97
2467
+ 51 98
2468
+ 51 115
2469
+ 51 126
2470
+ 51 128
2471
+ 51 162
2472
+ 51 165
2473
+ 52 53
2474
+ 52 106
2475
+ 53 54
2476
+ 53 55
2477
+ 53 56
2478
+ 53 57
2479
+ 53 60
2480
+ 53 64
2481
+ 53 68
2482
+ 53 87
2483
+ 53 95
2484
+ 53 97
2485
+ 53 99
2486
+ 53 104
2487
+ 53 105
2488
+ 53 106
2489
+ 53 107
2490
+ 53 108
2491
+ 53 110
2492
+ 53 117
2493
+ 53 119
2494
+ 53 120
2495
+ 53 123
2496
+ 53 128
2497
+ 53 137
2498
+ 53 154
2499
+ 53 175
2500
+ 53 197
2501
+ 53 202
2502
+ 54 55
2503
+ 54 57
2504
+ 54 64
2505
+ 54 99
2506
+ 54 100
2507
+ 54 102
2508
+ 54 106
2509
+ 54 107
2510
+ 54 108
2511
+ 54 109
2512
+ 54 110
2513
+ 54 117
2514
+ 54 119
2515
+ 54 120
2516
+ 54 123
2517
+ 54 128
2518
+ 54 175
2519
+ 54 202
2520
+ 55 56
2521
+ 55 60
2522
+ 55 62
2523
+ 55 64
2524
+ 55 66
2525
+ 55 68
2526
+ 55 95
2527
+ 55 96
2528
+ 55 97
2529
+ 55 99
2530
+ 55 104
2531
+ 55 106
2532
+ 55 107
2533
+ 55 108
2534
+ 55 110
2535
+ 55 112
2536
+ 55 115
2537
+ 55 117
2538
+ 55 118
2539
+ 55 120
2540
+ 55 123
2541
+ 55 124
2542
+ 55 128
2543
+ 55 131
2544
+ 55 137
2545
+ 55 154
2546
+ 55 164
2547
+ 55 165
2548
+ 55 175
2549
+ 55 214
2550
+ 56 57
2551
+ 56 60
2552
+ 56 62
2553
+ 56 64
2554
+ 56 66
2555
+ 56 68
2556
+ 56 97
2557
+ 56 98
2558
+ 56 99
2559
+ 56 100
2560
+ 56 105
2561
+ 56 106
2562
+ 56 107
2563
+ 56 108
2564
+ 56 110
2565
+ 56 117
2566
+ 56 119
2567
+ 56 120
2568
+ 56 123
2569
+ 56 137
2570
+ 56 175
2571
+ 56 202
2572
+ 57 60
2573
+ 57 64
2574
+ 57 95
2575
+ 57 96
2576
+ 57 97
2577
+ 57 99
2578
+ 57 104
2579
+ 57 106
2580
+ 57 107
2581
+ 57 110
2582
+ 57 112
2583
+ 57 117
2584
+ 57 118
2585
+ 57 120
2586
+ 57 123
2587
+ 57 128
2588
+ 57 131
2589
+ 57 137
2590
+ 57 154
2591
+ 57 175
2592
+ 59 62
2593
+ 59 67
2594
+ 59 68
2595
+ 59 82
2596
+ 59 83
2597
+ 59 87
2598
+ 59 88
2599
+ 59 102
2600
+ 59 125
2601
+ 59 129
2602
+ 59 158
2603
+ 59 159
2604
+ 59 160
2605
+ 59 161
2606
+ 59 162
2607
+ 59 163
2608
+ 60 61
2609
+ 60 64
2610
+ 60 66
2611
+ 60 97
2612
+ 60 99
2613
+ 60 104
2614
+ 60 106
2615
+ 60 107
2616
+ 60 110
2617
+ 60 115
2618
+ 60 117
2619
+ 60 120
2620
+ 60 123
2621
+ 60 128
2622
+ 60 154
2623
+ 60 155
2624
+ 60 156
2625
+ 60 157
2626
+ 60 175
2627
+ 60 177
2628
+ 60 196
2629
+ 61 62
2630
+ 61 64
2631
+ 61 66
2632
+ 61 155
2633
+ 61 156
2634
+ 61 157
2635
+ 61 196
2636
+ 62 63
2637
+ 62 65
2638
+ 62 69
2639
+ 62 97
2640
+ 62 101
2641
+ 62 108
2642
+ 62 110
2643
+ 63 66
2644
+ 63 67
2645
+ 63 68
2646
+ 63 82
2647
+ 63 83
2648
+ 63 84
2649
+ 63 87
2650
+ 63 88
2651
+ 63 97
2652
+ 63 104
2653
+ 63 111
2654
+ 63 122
2655
+ 63 125
2656
+ 63 129
2657
+ 63 145
2658
+ 63 158
2659
+ 63 160
2660
+ 63 161
2661
+ 63 162
2662
+ 63 163
2663
+ 64 65
2664
+ 64 69
2665
+ 64 80
2666
+ 64 97
2667
+ 64 98
2668
+ 64 101
2669
+ 64 104
2670
+ 64 106
2671
+ 64 107
2672
+ 64 108
2673
+ 64 110
2674
+ 64 115
2675
+ 64 116
2676
+ 64 117
2677
+ 64 119
2678
+ 64 120
2679
+ 64 123
2680
+ 64 128
2681
+ 64 164
2682
+ 64 175
2683
+ 64 202
2684
+ 65 66
2685
+ 65 67
2686
+ 65 80
2687
+ 65 84
2688
+ 65 87
2689
+ 65 88
2690
+ 65 97
2691
+ 65 100
2692
+ 65 104
2693
+ 65 120
2694
+ 65 122
2695
+ 65 125
2696
+ 65 129
2697
+ 65 160
2698
+ 65 161
2699
+ 65 162
2700
+ 66 69
2701
+ 66 97
2702
+ 66 104
2703
+ 66 107
2704
+ 66 108
2705
+ 66 120
2706
+ 66 128
2707
+ 66 155
2708
+ 66 156
2709
+ 66 157
2710
+ 66 196
2711
+ 67 68
2712
+ 67 69
2713
+ 67 84
2714
+ 67 85
2715
+ 67 99
2716
+ 67 104
2717
+ 67 115
2718
+ 67 131
2719
+ 67 161
2720
+ 67 172
2721
+ 67 195
2722
+ 68 82
2723
+ 68 83
2724
+ 68 85
2725
+ 68 97
2726
+ 68 102
2727
+ 68 105
2728
+ 68 107
2729
+ 68 128
2730
+ 68 129
2731
+ 68 130
2732
+ 68 143
2733
+ 68 163
2734
+ 69 80
2735
+ 69 97
2736
+ 69 100
2737
+ 69 104
2738
+ 69 111
2739
+ 69 114
2740
+ 69 122
2741
+ 69 125
2742
+ 69 145
2743
+ 69 160
2744
+ 69 161
2745
+ 69 162
2746
+ 70 71
2747
+ 70 72
2748
+ 70 73
2749
+ 70 74
2750
+ 70 76
2751
+ 70 77
2752
+ 70 79
2753
+ 70 90
2754
+ 70 91
2755
+ 70 133
2756
+ 70 138
2757
+ 70 140
2758
+ 70 142
2759
+ 70 146
2760
+ 70 147
2761
+ 70 148
2762
+ 70 150
2763
+ 70 153
2764
+ 70 176
2765
+ 71 73
2766
+ 71 74
2767
+ 71 75
2768
+ 71 76
2769
+ 71 77
2770
+ 71 90
2771
+ 71 91
2772
+ 71 92
2773
+ 71 93
2774
+ 71 94
2775
+ 71 120
2776
+ 71 132
2777
+ 71 133
2778
+ 71 138
2779
+ 71 140
2780
+ 71 142
2781
+ 71 146
2782
+ 71 147
2783
+ 71 148
2784
+ 71 149
2785
+ 71 150
2786
+ 71 151
2787
+ 71 152
2788
+ 71 153
2789
+ 71 176
2790
+ 72 73
2791
+ 72 74
2792
+ 72 92
2793
+ 72 93
2794
+ 72 94
2795
+ 72 133
2796
+ 72 138
2797
+ 72 140
2798
+ 72 142
2799
+ 72 147
2800
+ 72 148
2801
+ 72 151
2802
+ 72 152
2803
+ 72 153
2804
+ 72 176
2805
+ 73 77
2806
+ 73 79
2807
+ 73 132
2808
+ 73 134
2809
+ 73 140
2810
+ 74 77
2811
+ 74 79
2812
+ 74 132
2813
+ 74 134
2814
+ 74 140
2815
+ 75 133
2816
+ 75 134
2817
+ 75 140
2818
+ 75 147
2819
+ 76 77
2820
+ 76 93
2821
+ 76 94
2822
+ 76 132
2823
+ 76 133
2824
+ 76 134
2825
+ 76 140
2826
+ 76 147
2827
+ 76 150
2828
+ 76 152
2829
+ 76 153
2830
+ 76 176
2831
+ 77 132
2832
+ 77 133
2833
+ 77 134
2834
+ 77 138
2835
+ 77 140
2836
+ 77 142
2837
+ 77 146
2838
+ 77 147
2839
+ 77 148
2840
+ 77 150
2841
+ 77 151
2842
+ 77 153
2843
+ 78 91
2844
+ 78 93
2845
+ 78 94
2846
+ 78 132
2847
+ 78 133
2848
+ 78 146
2849
+ 78 147
2850
+ 78 148
2851
+ 78 149
2852
+ 79 92
2853
+ 79 93
2854
+ 79 132
2855
+ 79 133
2856
+ 79 138
2857
+ 79 140
2858
+ 79 142
2859
+ 79 147
2860
+ 79 148
2861
+ 79 149
2862
+ 79 150
2863
+ 79 151
2864
+ 79 153
2865
+ 79 176
2866
+ 80 92
2867
+ 80 97
2868
+ 80 98
2869
+ 80 100
2870
+ 80 103
2871
+ 80 105
2872
+ 80 107
2873
+ 80 108
2874
+ 80 110
2875
+ 80 115
2876
+ 80 117
2877
+ 80 120
2878
+ 80 122
2879
+ 80 124
2880
+ 80 128
2881
+ 80 151
2882
+ 80 162
2883
+ 80 176
2884
+ 81 83
2885
+ 81 84
2886
+ 81 88
2887
+ 81 183
2888
+ 81 184
2889
+ 81 185
2890
+ 82 83
2891
+ 82 87
2892
+ 82 88
2893
+ 82 89
2894
+ 82 102
2895
+ 82 143
2896
+ 82 183
2897
+ 83 85
2898
+ 83 86
2899
+ 83 88
2900
+ 83 89
2901
+ 83 102
2902
+ 83 143
2903
+ 84 86
2904
+ 84 87
2905
+ 84 88
2906
+ 84 211
2907
+ 85 88
2908
+ 85 129
2909
+ 85 158
2910
+ 85 159
2911
+ 85 161
2912
+ 85 162
2913
+ 86 88
2914
+ 87 88
2915
+ 87 102
2916
+ 87 107
2917
+ 87 118
2918
+ 87 202
2919
+ 88 183
2920
+ 88 185
2921
+ 88 215
2922
+ 89 184
2923
+ 90 93
2924
+ 90 140
2925
+ 90 147
2926
+ 90 149
2927
+ 90 152
2928
+ 91 132
2929
+ 92 120
2930
+ 92 132
2931
+ 92 134
2932
+ 93 132
2933
+ 93 134
2934
+ 94 110
2935
+ 95 106
2936
+ 95 108
2937
+ 95 112
2938
+ 95 127
2939
+ 95 128
2940
+ 96 128
2941
+ 97 98
2942
+ 97 99
2943
+ 97 100
2944
+ 97 102
2945
+ 97 104
2946
+ 97 105
2947
+ 97 106
2948
+ 97 107
2949
+ 97 108
2950
+ 97 110
2951
+ 97 111
2952
+ 97 112
2953
+ 97 114
2954
+ 97 115
2955
+ 97 117
2956
+ 97 122
2957
+ 97 124
2958
+ 97 125
2959
+ 97 128
2960
+ 97 137
2961
+ 97 145
2962
+ 97 177
2963
+ 98 104
2964
+ 98 106
2965
+ 98 107
2966
+ 98 108
2967
+ 98 115
2968
+ 98 116
2969
+ 98 118
2970
+ 98 128
2971
+ 98 204
2972
+ 99 104
2973
+ 99 106
2974
+ 99 107
2975
+ 99 108
2976
+ 99 110
2977
+ 99 117
2978
+ 99 120
2979
+ 99 123
2980
+ 99 131
2981
+ 99 154
2982
+ 99 175
2983
+ 100 108
2984
+ 100 109
2985
+ 100 111
2986
+ 100 112
2987
+ 100 113
2988
+ 100 114
2989
+ 100 122
2990
+ 100 128
2991
+ 100 144
2992
+ 100 145
2993
+ 101 118
2994
+ 101 126
2995
+ 101 165
2996
+ 101 197
2997
+ 102 105
2998
+ 102 107
2999
+ 102 108
3000
+ 102 112
3001
+ 102 117
3002
+ 102 143
3003
+ 102 183
3004
+ 103 111
3005
+ 103 112
3006
+ 103 115
3007
+ 103 118
3008
+ 103 121
3009
+ 103 122
3010
+ 103 126
3011
+ 103 128
3012
+ 103 129
3013
+ 103 131
3014
+ 103 165
3015
+ 103 197
3016
+ 104 106
3017
+ 104 107
3018
+ 104 110
3019
+ 104 115
3020
+ 104 117
3021
+ 104 120
3022
+ 104 123
3023
+ 104 124
3024
+ 104 125
3025
+ 104 137
3026
+ 104 154
3027
+ 104 164
3028
+ 104 177
3029
+ 105 107
3030
+ 105 108
3031
+ 105 111
3032
+ 105 112
3033
+ 105 114
3034
+ 105 117
3035
+ 105 119
3036
+ 105 128
3037
+ 106 107
3038
+ 106 108
3039
+ 106 110
3040
+ 106 119
3041
+ 106 120
3042
+ 106 123
3043
+ 106 124
3044
+ 106 128
3045
+ 106 154
3046
+ 106 175
3047
+ 106 204
3048
+ 107 108
3049
+ 107 109
3050
+ 107 110
3051
+ 107 112
3052
+ 107 115
3053
+ 107 117
3054
+ 107 119
3055
+ 107 120
3056
+ 107 123
3057
+ 107 124
3058
+ 107 128
3059
+ 107 137
3060
+ 107 164
3061
+ 107 175
3062
+ 107 202
3063
+ 107 204
3064
+ 108 112
3065
+ 108 114
3066
+ 108 115
3067
+ 108 119
3068
+ 108 120
3069
+ 108 122
3070
+ 108 128
3071
+ 109 114
3072
+ 109 128
3073
+ 110 111
3074
+ 110 117
3075
+ 110 119
3076
+ 110 120
3077
+ 110 123
3078
+ 110 124
3079
+ 110 128
3080
+ 111 115
3081
+ 111 117
3082
+ 111 120
3083
+ 111 122
3084
+ 111 124
3085
+ 111 128
3086
+ 111 162
3087
+ 111 176
3088
+ 112 114
3089
+ 112 118
3090
+ 112 122
3091
+ 112 126
3092
+ 112 128
3093
+ 112 129
3094
+ 113 114
3095
+ 113 128
3096
+ 114 117
3097
+ 114 122
3098
+ 114 126
3099
+ 114 128
3100
+ 115 116
3101
+ 115 117
3102
+ 115 118
3103
+ 115 120
3104
+ 115 124
3105
+ 115 125
3106
+ 115 128
3107
+ 115 162
3108
+ 115 177
3109
+ 116 165
3110
+ 117 118
3111
+ 117 120
3112
+ 117 123
3113
+ 117 127
3114
+ 117 128
3115
+ 117 164
3116
+ 118 126
3117
+ 118 127
3118
+ 118 128
3119
+ 118 131
3120
+ 119 123
3121
+ 119 128
3122
+ 119 175
3123
+ 120 122
3124
+ 120 123
3125
+ 120 128
3126
+ 120 137
3127
+ 120 139
3128
+ 120 150
3129
+ 120 154
3130
+ 120 164
3131
+ 120 175
3132
+ 120 176
3133
+ 121 122
3134
+ 121 126
3135
+ 121 128
3136
+ 122 126
3137
+ 122 128
3138
+ 122 162
3139
+ 123 137
3140
+ 123 154
3141
+ 123 175
3142
+ 124 137
3143
+ 126 129
3144
+ 126 131
3145
+ 126 162
3146
+ 126 167
3147
+ 126 168
3148
+ 126 170
3149
+ 126 171
3150
+ 126 172
3151
+ 127 165
3152
+ 127 197
3153
+ 128 202
3154
+ 129 168
3155
+ 129 170
3156
+ 130 145
3157
+ 130 159
3158
+ 130 160
3159
+ 130 162
3160
+ 132 133
3161
+ 132 138
3162
+ 132 140
3163
+ 132 142
3164
+ 132 146
3165
+ 132 147
3166
+ 132 148
3167
+ 132 149
3168
+ 132 150
3169
+ 132 151
3170
+ 132 152
3171
+ 132 153
3172
+ 133 134
3173
+ 133 140
3174
+ 133 153
3175
+ 134 138
3176
+ 134 140
3177
+ 134 142
3178
+ 134 148
3179
+ 134 150
3180
+ 134 153
3181
+ 134 176
3182
+ 138 140
3183
+ 138 147
3184
+ 138 150
3185
+ 141 181
3186
+ 141 206
3187
+ 142 153
3188
+ 145 160
3189
+ 145 210
3190
+ 146 153
3191
+ 154 175
3192
+ 155 156
3193
+ 155 157
3194
+ 156 157
3195
+ 158 166
3196
+ 158 167
3197
+ 158 168
3198
+ 158 169
3199
+ 158 170
3200
+ 158 171
3201
+ 158 172
3202
+ 163 208
3203
+ 164 175
3204
+ 166 172
3205
+ 167 168
3206
+ 168 170
3207
+ 168 172
3208
+ 173 174
3209
+ 174 179
3210
+ 174 182
3211
+ 174 203
3212
+ 174 213
3213
+ 178 179
3214
+ 178 180
3215
+ 178 181
3216
+ 179 180
3217
+ 179 203
3218
+ 179 205
3219
+ 179 206
3220
+ 180 181
3221
+ 180 203
3222
+ 180 213
3223
+ 181 207
3224
+ 181 213
3225
+ 182 205
3226
+ 182 206
3227
+ 183 208
3228
+ 186 187
3229
+ 186 191
3230
+ 186 193
3231
+ 186 194
3232
+ 186 199
3233
+ 187 190
3234
+ 187 191
3235
+ 187 193
3236
+ 187 201
3237
+ 188 189
3238
+ 188 193
3239
+ 188 194
3240
+ 188 199
3241
+ 188 200
3242
+ 188 212
3243
+ 189 190
3244
+ 189 193
3245
+ 189 201
3246
+ 190 193
3247
+ 190 194
3248
+ 190 199
3249
+ 190 200
3250
+ 191 194
3251
+ 191 198
3252
+ 191 199
3253
+ 191 200
3254
+ 192 194
3255
+ 193 194
3256
+ 193 198
3257
+ 193 199
3258
+ 193 212
3259
+ 193 216
3260
+ 194 200
3261
+ 194 212
3262
+ 198 200
3263
+ 198 201
3264
+ 199 200
3265
+ 200 201
3266
+ 201 216
3267
+ 205 206
3268
+ 205 207
3269
+ 208 209
analysis/rdkit_functions.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import re
4
+ import wandb
5
+ try:
6
+ from rdkit import Chem
7
+ print("Found rdkit, all good")
8
+ except ModuleNotFoundError as e:
9
+ use_rdkit = False
10
+ from warnings import warn
11
+ warn("Didn't find rdkit, this will fail")
12
+ assert use_rdkit, "Didn't find rdkit"
13
+
14
+
15
+ allowed_bonds = {'H': 1, 'C': 4, 'N': 3, 'O': 2, 'F': 1, 'B': 3, 'Al': 3, 'Si': 4, 'P': [3, 5],
16
+ 'S': 4, 'Cl': 1, 'As': 3, 'Br': 1, 'I': 1, 'Hg': [1, 2], 'Bi': [3, 5], 'Se': [2, 4, 6]}
17
+ bond_dict = [None, Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, Chem.rdchem.BondType.TRIPLE,
18
+ Chem.rdchem.BondType.AROMATIC]
19
+ ATOM_VALENCY = {6: 4, 7: 3, 8: 2, 9: 1, 15: 3, 16: 2, 17: 1, 35: 1, 53: 1}
20
+
21
+
22
+ class BasicMolecularMetrics(object):
23
+ def __init__(self, dataset_info, train_smiles=None):
24
+ self.atom_decoder = dataset_info.atom_decoder
25
+ self.dataset_info = dataset_info
26
+
27
+ # Retrieve dataset smiles only for qm9 currently.
28
+ self.dataset_smiles_list = train_smiles
29
+
30
+ def compute_validity(self, generated):
31
+ """ generated: list of couples (positions, atom_types)"""
32
+ valid = []
33
+ num_components = []
34
+ all_smiles = []
35
+ for graph in generated:
36
+ atom_types, edge_types = graph
37
+ mol = build_molecule(atom_types, edge_types, self.dataset_info.atom_decoder)
38
+ smiles = mol2smiles(mol)
39
+ try:
40
+ mol_frags = Chem.rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True)
41
+ num_components.append(len(mol_frags))
42
+ except:
43
+ pass
44
+ if smiles is not None:
45
+ try:
46
+ mol_frags = Chem.rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True)
47
+ largest_mol = max(mol_frags, default=mol, key=lambda m: m.GetNumAtoms())
48
+ smiles = mol2smiles(largest_mol)
49
+ valid.append(smiles)
50
+ all_smiles.append(smiles)
51
+ except Chem.rdchem.AtomValenceException:
52
+ print("Valence error in GetmolFrags")
53
+ all_smiles.append(None)
54
+ except Chem.rdchem.KekulizeException:
55
+ print("Can't kekulize molecule")
56
+ all_smiles.append(None)
57
+ else:
58
+ all_smiles.append(None)
59
+
60
+ return valid, len(valid) / len(generated), np.array(num_components), all_smiles
61
+
62
+ def compute_uniqueness(self, valid):
63
+ """ valid: list of SMILES strings."""
64
+ return list(set(valid)), len(set(valid)) / len(valid)
65
+
66
+ def compute_novelty(self, unique):
67
+ num_novel = 0
68
+ novel = []
69
+ if self.dataset_smiles_list is None:
70
+ print("Dataset smiles is None, novelty computation skipped")
71
+ return 1, 1
72
+ for smiles in unique:
73
+ if smiles not in self.dataset_smiles_list:
74
+ novel.append(smiles)
75
+ num_novel += 1
76
+ return novel, num_novel / len(unique)
77
+
78
+ def compute_relaxed_validity(self, generated):
79
+ valid = []
80
+ for graph in generated:
81
+ atom_types, edge_types = graph
82
+ mol = build_molecule_with_partial_charges(atom_types, edge_types, self.dataset_info.atom_decoder)
83
+ smiles = mol2smiles(mol)
84
+ if smiles is not None:
85
+ try:
86
+ mol_frags = Chem.rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True)
87
+ largest_mol = max(mol_frags, default=mol, key=lambda m: m.GetNumAtoms())
88
+ smiles = mol2smiles(largest_mol)
89
+ valid.append(smiles)
90
+ except Chem.rdchem.AtomValenceException:
91
+ print("Valence error in GetmolFrags")
92
+ except Chem.rdchem.KekulizeException:
93
+ print("Can't kekulize molecule")
94
+ return valid, len(valid) / len(generated)
95
+
96
+ def evaluate(self, generated):
97
+ """ generated: list of pairs (positions: n x 3, atom_types: n [int])
98
+ the positions and atom types should already be masked. """
99
+ valid, validity, num_components, all_smiles = self.compute_validity(generated)
100
+ nc_mu = num_components.mean() if len(num_components) > 0 else 0
101
+ nc_min = num_components.min() if len(num_components) > 0 else 0
102
+ nc_max = num_components.max() if len(num_components) > 0 else 0
103
+ print(f"Validity over {len(generated)} molecules: {validity * 100 :.2f}%")
104
+ print(f"Number of connected components of {len(generated)} molecules: min:{nc_min:.2f} mean:{nc_mu:.2f} max:{nc_max:.2f}")
105
+
106
+ relaxed_valid, relaxed_validity = self.compute_relaxed_validity(generated)
107
+ print(f"Relaxed validity over {len(generated)} molecules: {relaxed_validity * 100 :.2f}%")
108
+ if relaxed_validity > 0:
109
+ unique, uniqueness = self.compute_uniqueness(relaxed_valid)
110
+ print(f"Uniqueness over {len(relaxed_valid)} valid molecules: {uniqueness * 100 :.2f}%")
111
+
112
+ if self.dataset_smiles_list is not None:
113
+ _, novelty = self.compute_novelty(unique)
114
+ print(f"Novelty over {len(unique)} unique valid molecules: {novelty * 100 :.2f}%")
115
+ else:
116
+ novelty = -1.0
117
+ else:
118
+ novelty = -1.0
119
+ uniqueness = 0.0
120
+ unique = []
121
+ return ([validity, relaxed_validity, uniqueness, novelty], unique,
122
+ dict(nc_min=nc_min, nc_max=nc_max, nc_mu=nc_mu), all_smiles)
123
+
124
+
125
+ def mol2smiles(mol):
126
+ try:
127
+ Chem.SanitizeMol(mol)
128
+ except ValueError:
129
+ return None
130
+ return Chem.MolToSmiles(mol)
131
+
132
+
133
+ def build_molecule(atom_types, edge_types, atom_decoder, verbose=False):
134
+ if verbose:
135
+ print("building new molecule")
136
+
137
+ mol = Chem.RWMol()
138
+ for atom in atom_types:
139
+ a = Chem.Atom(atom_decoder[atom.item()])
140
+ mol.AddAtom(a)
141
+ if verbose:
142
+ print("Atom added: ", atom.item(), atom_decoder[atom.item()])
143
+
144
+ edge_types = torch.triu(edge_types)
145
+ all_bonds = torch.nonzero(edge_types)
146
+ for i, bond in enumerate(all_bonds):
147
+ if bond[0].item() != bond[1].item():
148
+ mol.AddBond(bond[0].item(), bond[1].item(), bond_dict[edge_types[bond[0], bond[1]].item()])
149
+ if verbose:
150
+ print("bond added:", bond[0].item(), bond[1].item(), edge_types[bond[0], bond[1]].item(),
151
+ bond_dict[edge_types[bond[0], bond[1]].item()] )
152
+ return mol
153
+
154
+
155
+ def build_molecule_with_partial_charges(atom_types, edge_types, atom_decoder, verbose=False):
156
+ if verbose:
157
+ print("\nbuilding new molecule")
158
+
159
+ mol = Chem.RWMol()
160
+ for atom in atom_types:
161
+ a = Chem.Atom(atom_decoder[atom.item()])
162
+ mol.AddAtom(a)
163
+ if verbose:
164
+ print("Atom added: ", atom.item(), atom_decoder[atom.item()])
165
+ edge_types = torch.triu(edge_types)
166
+ all_bonds = torch.nonzero(edge_types)
167
+
168
+ for i, bond in enumerate(all_bonds):
169
+ if bond[0].item() != bond[1].item():
170
+ mol.AddBond(bond[0].item(), bond[1].item(), bond_dict[edge_types[bond[0], bond[1]].item()])
171
+ if verbose:
172
+ print("bond added:", bond[0].item(), bond[1].item(), edge_types[bond[0], bond[1]].item(),
173
+ bond_dict[edge_types[bond[0], bond[1]].item()])
174
+ # add formal charge to atom: e.g. [O+], [N+], [S+]
175
+ # not support [O-], [N-], [S-], [NH+] etc.
176
+ flag, atomid_valence = check_valency(mol)
177
+ if verbose:
178
+ print("flag, valence", flag, atomid_valence)
179
+ if flag:
180
+ continue
181
+ else:
182
+ assert len(atomid_valence) == 2
183
+ idx = atomid_valence[0]
184
+ v = atomid_valence[1]
185
+ an = mol.GetAtomWithIdx(idx).GetAtomicNum()
186
+ if verbose:
187
+ print("atomic num of atom with a large valence", an)
188
+ if an in (7, 8, 16) and (v - ATOM_VALENCY[an]) == 1:
189
+ mol.GetAtomWithIdx(idx).SetFormalCharge(1)
190
+ # print("Formal charge added")
191
+ return mol
192
+
193
+
194
+ # Functions from GDSS
195
+ def check_valency(mol):
196
+ try:
197
+ Chem.SanitizeMol(mol, sanitizeOps=Chem.SanitizeFlags.SANITIZE_PROPERTIES)
198
+ return True, None
199
+ except ValueError as e:
200
+ e = str(e)
201
+ p = e.find('#')
202
+ e_sub = e[p:]
203
+ atomid_valence = list(map(int, re.findall(r'\d+', e_sub)))
204
+ return False, atomid_valence
205
+
206
+
207
+ def correct_mol(m):
208
+ # xsm = Chem.MolToSmiles(x, isomericSmiles=True)
209
+ mol = m
210
+
211
+ #####
212
+ no_correct = False
213
+ flag, _ = check_valency(mol)
214
+ if flag:
215
+ no_correct = True
216
+
217
+ while True:
218
+ flag, atomid_valence = check_valency(mol)
219
+ if flag:
220
+ break
221
+ else:
222
+ assert len(atomid_valence) == 2
223
+ idx = atomid_valence[0]
224
+ v = atomid_valence[1]
225
+ queue = []
226
+ check_idx = 0
227
+ for b in mol.GetAtomWithIdx(idx).GetBonds():
228
+ type = int(b.GetBondType())
229
+ queue.append((b.GetIdx(), type, b.GetBeginAtomIdx(), b.GetEndAtomIdx()))
230
+ if type == 12:
231
+ check_idx += 1
232
+ queue.sort(key=lambda tup: tup[1], reverse=True)
233
+
234
+ if queue[-1][1] == 12:
235
+ return None, no_correct
236
+ elif len(queue) > 0:
237
+ start = queue[check_idx][2]
238
+ end = queue[check_idx][3]
239
+ t = queue[check_idx][1] - 1
240
+ mol.RemoveBond(start, end)
241
+ if t >= 1:
242
+ mol.AddBond(start, end, bond_dict[t])
243
+ return mol, no_correct
244
+
245
+
246
+ def valid_mol_can_with_seg(m, largest_connected_comp=True):
247
+ if m is None:
248
+ return None
249
+ sm = Chem.MolToSmiles(m, isomericSmiles=True)
250
+ if largest_connected_comp and '.' in sm:
251
+ vsm = [(s, len(s)) for s in sm.split('.')] # 'C.CC.CCc1ccc(N)cc1CCC=O'.split('.')
252
+ vsm.sort(key=lambda tup: tup[1], reverse=True)
253
+ mol = Chem.MolFromSmiles(vsm[0][0])
254
+ else:
255
+ mol = Chem.MolFromSmiles(sm)
256
+ return mol
257
+
258
+
259
+ if __name__ == '__main__':
260
+ smiles_mol = 'C1CCC1'
261
+ print("Smiles mol %s" % smiles_mol)
262
+ chem_mol = Chem.MolFromSmiles(smiles_mol)
263
+ block_mol = Chem.MolToMolBlock(chem_mol)
264
+ print("Block mol:")
265
+ print(block_mol)
266
+
267
+ use_rdkit = True
268
+
269
+
270
+ def check_stability(atom_types, edge_types, dataset_info, debug=False,atom_decoder=None):
271
+ if atom_decoder is None:
272
+ atom_decoder = dataset_info.atom_decoder
273
+
274
+ n_bonds = np.zeros(len(atom_types), dtype='int')
275
+
276
+ for i in range(len(atom_types)):
277
+ for j in range(i + 1, len(atom_types)):
278
+ n_bonds[i] += abs((edge_types[i, j] + edge_types[j, i])/2)
279
+ n_bonds[j] += abs((edge_types[i, j] + edge_types[j, i])/2)
280
+ n_stable_bonds = 0
281
+ for atom_type, atom_n_bond in zip(atom_types, n_bonds):
282
+ possible_bonds = allowed_bonds[atom_decoder[atom_type]]
283
+ if type(possible_bonds) == int:
284
+ is_stable = possible_bonds == atom_n_bond
285
+ else:
286
+ is_stable = atom_n_bond in possible_bonds
287
+ if not is_stable and debug:
288
+ print("Invalid bonds for molecule %s with %d bonds" % (atom_decoder[atom_type], atom_n_bond))
289
+ n_stable_bonds += int(is_stable)
290
+
291
+ molecule_stable = n_stable_bonds == len(atom_types)
292
+ return molecule_stable, n_stable_bonds, len(atom_types)
293
+
294
+
295
+ def compute_molecular_metrics(molecule_list, train_smiles, dataset_info):
296
+ """ molecule_list: (dict) """
297
+
298
+ if not dataset_info.remove_h:
299
+ print(f'Analyzing molecule stability...')
300
+
301
+ molecule_stable = 0
302
+ nr_stable_bonds = 0
303
+ n_atoms = 0
304
+ n_molecules = len(molecule_list)
305
+
306
+ for i, mol in enumerate(molecule_list):
307
+ atom_types, edge_types = mol
308
+
309
+ validity_results = check_stability(atom_types, edge_types, dataset_info)
310
+
311
+ molecule_stable += int(validity_results[0])
312
+ nr_stable_bonds += int(validity_results[1])
313
+ n_atoms += int(validity_results[2])
314
+
315
+ # Validity
316
+ fraction_mol_stable = molecule_stable / float(n_molecules)
317
+ fraction_atm_stable = nr_stable_bonds / float(n_atoms)
318
+ validity_dict = {'mol_stable': fraction_mol_stable, 'atm_stable': fraction_atm_stable}
319
+ if wandb.run:
320
+ wandb.log(validity_dict)
321
+ else:
322
+ validity_dict = {'mol_stable': -1, 'atm_stable': -1}
323
+
324
+ metrics = BasicMolecularMetrics(dataset_info, train_smiles)
325
+ rdkit_metrics = metrics.evaluate(molecule_list)
326
+ all_smiles = rdkit_metrics[-1]
327
+ if wandb.run:
328
+ nc = rdkit_metrics[-2]
329
+ dic = {'Validity': rdkit_metrics[0][0], 'Relaxed Validity': rdkit_metrics[0][1],
330
+ 'Uniqueness': rdkit_metrics[0][2], 'Novelty': rdkit_metrics[0][3],
331
+ 'nc_max': nc['nc_max'], 'nc_mu': nc['nc_mu']}
332
+ wandb.log(dic)
333
+
334
+ return validity_dict, rdkit_metrics, all_smiles
analysis/spectre_utils.py ADDED
@@ -0,0 +1,928 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ###############################################################################
2
+ #
3
+ # Adapted from https://github.com/lrjconan/GRAN/ which in turn is adapted from https://github.com/JiaxuanYou/graph-generation
4
+ #
5
+ ###############################################################################
6
+ # import graph_tool.all as gt
7
+ ##Navigate to the ./util/orca directory and compile orca.cpp
8
+ # g++ -O2 -std=c++11 -o orca orca.cpp
9
+ import os
10
+ import copy
11
+ import torch
12
+ import torch.nn as nn
13
+ import numpy as np
14
+ import networkx as nx
15
+ import subprocess as sp
16
+ import concurrent.futures
17
+
18
+ import pygsp as pg
19
+ import secrets
20
+ from string import ascii_uppercase, digits
21
+ from datetime import datetime
22
+ from scipy.linalg import eigvalsh
23
+ from scipy.stats import chi2
24
+ from analysis.dist_helper import compute_mmd, gaussian_emd, gaussian, emd, gaussian_tv, disc
25
+ from torch_geometric.utils import to_networkx
26
+ import wandb
27
+ from collections import defaultdict
28
+
29
+
30
+ PRINT_TIME = False
31
+ __all__ = ['degree_stats', 'clustering_stats', 'orbit_stats_all', 'spectral_stats', 'eval_acc_lobster_graph']
32
+
33
+
34
+ def degree_worker(G):
35
+ return np.array(nx.degree_histogram(G))
36
+
37
+
38
+ def degree_stats(graph_ref_list, graph_pred_list, is_parallel=True, compute_emd=False):
39
+ ''' Compute the distance between the degree distributions of two unordered sets of graphs.
40
+ Args:
41
+ graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated
42
+ '''
43
+ sample_ref = []
44
+ sample_pred = []
45
+ # in case an empty graph is generated
46
+ graph_pred_list_remove_empty = [
47
+ G for G in graph_pred_list if not G.number_of_nodes() == 0
48
+ ]
49
+
50
+ prev = datetime.now()
51
+ if is_parallel:
52
+ with concurrent.futures.ThreadPoolExecutor() as executor:
53
+ for deg_hist in executor.map(degree_worker, graph_ref_list):
54
+ sample_ref.append(deg_hist)
55
+ with concurrent.futures.ThreadPoolExecutor() as executor:
56
+ for deg_hist in executor.map(degree_worker, graph_pred_list_remove_empty):
57
+ sample_pred.append(deg_hist)
58
+ else:
59
+ for i in range(len(graph_ref_list)):
60
+ degree_temp = np.array(nx.degree_histogram(graph_ref_list[i]))
61
+ sample_ref.append(degree_temp)
62
+ for i in range(len(graph_pred_list_remove_empty)):
63
+ degree_temp = np.array(
64
+ nx.degree_histogram(graph_pred_list_remove_empty[i]))
65
+ sample_pred.append(degree_temp)
66
+
67
+ # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd)
68
+ # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd)
69
+ if compute_emd:
70
+ # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN
71
+ # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd)
72
+ mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd)
73
+ else:
74
+ mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv)
75
+ # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian)
76
+
77
+ elapsed = datetime.now() - prev
78
+ if PRINT_TIME:
79
+ print('Time computing degree mmd: ', elapsed)
80
+ return mmd_dist
81
+
82
+
83
+ ###############################################################################
84
+
85
+ def spectral_worker(G, n_eigvals=-1):
86
+ # eigs = nx.laplacian_spectrum(G)
87
+ try:
88
+ eigs = eigvalsh(nx.normalized_laplacian_matrix(G).todense())
89
+ except:
90
+ eigs = np.zeros(G.number_of_nodes())
91
+ if n_eigvals > 0:
92
+ eigs = eigs[1:n_eigvals + 1]
93
+ spectral_pmf, _ = np.histogram(eigs, bins=200, range=(-1e-5, 2), density=False)
94
+ spectral_pmf = spectral_pmf / spectral_pmf.sum()
95
+ return spectral_pmf
96
+
97
+
98
+ def get_spectral_pmf(eigs, max_eig):
99
+ spectral_pmf, _ = np.histogram(np.clip(eigs, 0, max_eig), bins=200, range=(-1e-5, max_eig), density=False)
100
+ spectral_pmf = spectral_pmf / spectral_pmf.sum()
101
+ return spectral_pmf
102
+
103
+
104
+ def eigval_stats(eig_ref_list, eig_pred_list, max_eig=20, is_parallel=True, compute_emd=False):
105
+ ''' Compute the distance between the degree distributions of two unordered sets of graphs.
106
+ Args:
107
+ graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated
108
+ '''
109
+ sample_ref = []
110
+ sample_pred = []
111
+
112
+ prev = datetime.now()
113
+ if is_parallel:
114
+ with concurrent.futures.ThreadPoolExecutor() as executor:
115
+ for spectral_density in executor.map(get_spectral_pmf, eig_ref_list,
116
+ [max_eig for i in range(len(eig_ref_list))]):
117
+ sample_ref.append(spectral_density)
118
+ with concurrent.futures.ThreadPoolExecutor() as executor:
119
+ for spectral_density in executor.map(get_spectral_pmf, eig_pred_list,
120
+ [max_eig for i in range(len(eig_ref_list))]):
121
+ sample_pred.append(spectral_density)
122
+ else:
123
+ for i in range(len(eig_ref_list)):
124
+ spectral_temp = get_spectral_pmf(eig_ref_list[i])
125
+ sample_ref.append(spectral_temp)
126
+ for i in range(len(eig_pred_list)):
127
+ spectral_temp = get_spectral_pmf(eig_pred_list[i])
128
+ sample_pred.append(spectral_temp)
129
+
130
+ # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd)
131
+ if compute_emd:
132
+ mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd)
133
+ else:
134
+ mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv)
135
+ # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian)
136
+
137
+ elapsed = datetime.now() - prev
138
+ if PRINT_TIME:
139
+ print('Time computing eig mmd: ', elapsed)
140
+ return mmd_dist
141
+
142
+
143
+ def eigh_worker(G):
144
+ L = nx.normalized_laplacian_matrix(G).todense()
145
+ try:
146
+ eigvals, eigvecs = np.linalg.eigh(L)
147
+ except:
148
+ eigvals = np.zeros(L[0, :].shape)
149
+ eigvecs = np.zeros(L.shape)
150
+ return (eigvals, eigvecs)
151
+
152
+
153
+ def compute_list_eigh(graph_list, is_parallel=False):
154
+ eigval_list = []
155
+ eigvec_list = []
156
+ if is_parallel:
157
+ with concurrent.futures.ThreadPoolExecutor() as executor:
158
+ for e_U in executor.map(eigh_worker, graph_list):
159
+ eigval_list.append(e_U[0])
160
+ eigvec_list.append(e_U[1])
161
+ else:
162
+ for i in range(len(graph_list)):
163
+ e_U = eigh_worker(graph_list[i])
164
+ eigval_list.append(e_U[0])
165
+ eigvec_list.append(e_U[1])
166
+ return eigval_list, eigvec_list
167
+
168
+
169
+ def get_spectral_filter_worker(eigvec, eigval, filters, bound=1.4):
170
+ ges = filters.evaluate(eigval)
171
+ linop = []
172
+ for ge in ges:
173
+ linop.append(eigvec @ np.diag(ge) @ eigvec.T)
174
+ linop = np.array(linop)
175
+ norm_filt = np.sum(linop ** 2, axis=2)
176
+ hist_range = [0, bound]
177
+ hist = np.array([np.histogram(x, range=hist_range, bins=100)[0] for x in norm_filt]) # NOTE: change number of bins
178
+ return hist.flatten()
179
+
180
+
181
+ def spectral_filter_stats(eigvec_ref_list, eigval_ref_list, eigvec_pred_list, eigval_pred_list, is_parallel=False,
182
+ compute_emd=False):
183
+ ''' Compute the distance between the eigvector sets.
184
+ Args:
185
+ graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated
186
+ '''
187
+ prev = datetime.now()
188
+
189
+ class DMG(object):
190
+ """Dummy Normalized Graph"""
191
+ lmax = 2
192
+
193
+ n_filters = 12
194
+ filters = pg.filters.Abspline(DMG, n_filters)
195
+ bound = np.max(filters.evaluate(np.arange(0, 2, 0.01)))
196
+ sample_ref = []
197
+ sample_pred = []
198
+ if is_parallel:
199
+ with concurrent.futures.ThreadPoolExecutor() as executor:
200
+ for spectral_density in executor.map(get_spectral_filter_worker, eigvec_ref_list, eigval_ref_list,
201
+ [filters for i in range(len(eigval_ref_list))],
202
+ [bound for i in range(len(eigval_ref_list))]):
203
+ sample_ref.append(spectral_density)
204
+ with concurrent.futures.ThreadPoolExecutor() as executor:
205
+ for spectral_density in executor.map(get_spectral_filter_worker, eigvec_pred_list, eigval_pred_list,
206
+ [filters for i in range(len(eigval_ref_list))],
207
+ [bound for i in range(len(eigval_ref_list))]):
208
+ sample_pred.append(spectral_density)
209
+ else:
210
+ for i in range(len(eigval_ref_list)):
211
+ try:
212
+ spectral_temp = get_spectral_filter_worker(eigvec_ref_list[i], eigval_ref_list[i], filters, bound)
213
+ sample_ref.append(spectral_temp)
214
+ except:
215
+ pass
216
+ for i in range(len(eigval_pred_list)):
217
+ try:
218
+ spectral_temp = get_spectral_filter_worker(eigvec_pred_list[i], eigval_pred_list[i], filters, bound)
219
+ sample_pred.append(spectral_temp)
220
+ except:
221
+ pass
222
+
223
+ if compute_emd:
224
+ # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN
225
+ # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd)
226
+ mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd)
227
+ else:
228
+ mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv)
229
+
230
+ elapsed = datetime.now() - prev
231
+ if PRINT_TIME:
232
+ print('Time computing spectral filter stats: ', elapsed)
233
+ return mmd_dist
234
+
235
+
236
+ def spectral_stats(graph_ref_list, graph_pred_list, is_parallel=True, n_eigvals=-1, compute_emd=False):
237
+ ''' Compute the distance between the degree distributions of two unordered sets of graphs.
238
+ Args:
239
+ graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated
240
+ '''
241
+ sample_ref = []
242
+ sample_pred = []
243
+ # in case an empty graph is generated
244
+ graph_pred_list_remove_empty = [
245
+ G for G in graph_pred_list if not G.number_of_nodes() == 0
246
+ ]
247
+
248
+ prev = datetime.now()
249
+ if is_parallel:
250
+ with concurrent.futures.ThreadPoolExecutor() as executor:
251
+ for spectral_density in executor.map(spectral_worker, graph_ref_list, [n_eigvals for i in graph_ref_list]):
252
+ sample_ref.append(spectral_density)
253
+ with concurrent.futures.ThreadPoolExecutor() as executor:
254
+ for spectral_density in executor.map(spectral_worker, graph_pred_list_remove_empty,
255
+ [n_eigvals for i in graph_ref_list]):
256
+ sample_pred.append(spectral_density)
257
+ else:
258
+ for i in range(len(graph_ref_list)):
259
+ spectral_temp = spectral_worker(graph_ref_list[i], n_eigvals)
260
+ sample_ref.append(spectral_temp)
261
+ for i in range(len(graph_pred_list_remove_empty)):
262
+ spectral_temp = spectral_worker(graph_pred_list_remove_empty[i], n_eigvals)
263
+ sample_pred.append(spectral_temp)
264
+
265
+ # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd)
266
+ # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd)
267
+ if compute_emd:
268
+ # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN
269
+ # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd)
270
+ mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd)
271
+ else:
272
+ mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv)
273
+ # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian)
274
+
275
+ elapsed = datetime.now() - prev
276
+ if PRINT_TIME:
277
+ print('Time computing degree mmd: ', elapsed)
278
+ return mmd_dist
279
+
280
+
281
+ ###############################################################################
282
+
283
+ def clustering_worker(param):
284
+ G, bins = param
285
+ clustering_coeffs_list = list(nx.clustering(G).values())
286
+ hist, _ = np.histogram(
287
+ clustering_coeffs_list, bins=bins, range=(0.0, 1.0), density=False)
288
+ return hist
289
+
290
+
291
+ def clustering_stats(graph_ref_list,
292
+ graph_pred_list,
293
+ bins=100,
294
+ is_parallel=True, compute_emd=False):
295
+ sample_ref = []
296
+ sample_pred = []
297
+ graph_pred_list_remove_empty = [
298
+ G for G in graph_pred_list if not G.number_of_nodes() == 0
299
+ ]
300
+
301
+ prev = datetime.now()
302
+ if is_parallel:
303
+ with concurrent.futures.ThreadPoolExecutor() as executor:
304
+ for clustering_hist in executor.map(clustering_worker,
305
+ [(G, bins) for G in graph_ref_list]):
306
+ sample_ref.append(clustering_hist)
307
+ with concurrent.futures.ThreadPoolExecutor() as executor:
308
+ for clustering_hist in executor.map(
309
+ clustering_worker, [(G, bins) for G in graph_pred_list_remove_empty]):
310
+ sample_pred.append(clustering_hist)
311
+
312
+ # check non-zero elements in hist
313
+ # total = 0
314
+ # for i in range(len(sample_pred)):
315
+ # nz = np.nonzero(sample_pred[i])[0].shape[0]
316
+ # total += nz
317
+ # print(total)
318
+ else:
319
+ for i in range(len(graph_ref_list)):
320
+ clustering_coeffs_list = list(nx.clustering(graph_ref_list[i]).values())
321
+ hist, _ = np.histogram(
322
+ clustering_coeffs_list, bins=bins, range=(0.0, 1.0), density=False)
323
+ sample_ref.append(hist)
324
+
325
+ for i in range(len(graph_pred_list_remove_empty)):
326
+ clustering_coeffs_list = list(
327
+ nx.clustering(graph_pred_list_remove_empty[i]).values())
328
+ hist, _ = np.histogram(
329
+ clustering_coeffs_list, bins=bins, range=(0.0, 1.0), density=False)
330
+ sample_pred.append(hist)
331
+
332
+ if compute_emd:
333
+ # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN
334
+ # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd, sigma=1.0 / 10)
335
+ mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd, sigma=1.0 / 10, distance_scaling=bins)
336
+ else:
337
+ mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv, sigma=1.0 / 10)
338
+
339
+ elapsed = datetime.now() - prev
340
+ if PRINT_TIME:
341
+ print('Time computing clustering mmd: ', elapsed)
342
+ return mmd_dist
343
+
344
+
345
+ # maps motif/orbit name string to its corresponding list of indices from orca output
346
+ motif_to_indices = {
347
+ '3path': [1, 2],
348
+ '4cycle': [8],
349
+ }
350
+ COUNT_START_STR = 'orbit counts:'
351
+
352
+
353
+ def edge_list_reindexed(G):
354
+ idx = 0
355
+ id2idx = dict()
356
+ for u in G.nodes():
357
+ id2idx[str(u)] = idx
358
+ idx += 1
359
+
360
+ edges = []
361
+ for (u, v) in G.edges():
362
+ edges.append((id2idx[str(u)], id2idx[str(v)]))
363
+ return edges
364
+
365
+
366
+ def orca(graph):
367
+ # tmp_fname = f'analysis/orca/tmp_{"".join(secrets.choice(ascii_uppercase + digits) for i in range(8))}.txt'
368
+ tmp_fname = f'orca/tmp_{"".join(secrets.choice(ascii_uppercase + digits) for i in range(8))}.txt'
369
+ tmp_fname = os.path.join(os.path.dirname(os.path.realpath(__file__)), tmp_fname)
370
+ # print(tmp_fname, flush=True)
371
+ f = open(tmp_fname, 'w')
372
+ f.write(
373
+ str(graph.number_of_nodes()) + ' ' + str(graph.number_of_edges()) + '\n')
374
+ for (u, v) in edge_list_reindexed(graph):
375
+ f.write(str(u) + ' ' + str(v) + '\n')
376
+ f.close()
377
+ output = sp.check_output(
378
+ [str(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'orca/orca')), 'node', '4', tmp_fname, 'std'])
379
+ output = output.decode('utf8').strip()
380
+ idx = output.find(COUNT_START_STR) + len(COUNT_START_STR) + 2
381
+ output = output[idx:]
382
+ node_orbit_counts = np.array([
383
+ list(map(int,
384
+ node_cnts.strip().split(' ')))
385
+ for node_cnts in output.strip('\n').split('\n')
386
+ ])
387
+
388
+ try:
389
+ os.remove(tmp_fname)
390
+ except OSError:
391
+ pass
392
+
393
+ return node_orbit_counts
394
+
395
+
396
+ def motif_stats(graph_ref_list, graph_pred_list, motif_type='4cycle', ground_truth_match=None,
397
+ bins=100, compute_emd=False):
398
+ # graph motif counts (int for each graph)
399
+ # normalized by graph size
400
+ total_counts_ref = []
401
+ total_counts_pred = []
402
+
403
+ num_matches_ref = []
404
+ num_matches_pred = []
405
+
406
+ graph_pred_list_remove_empty = [G for G in graph_pred_list if not G.number_of_nodes() == 0]
407
+ indices = motif_to_indices[motif_type]
408
+
409
+ for G in graph_ref_list:
410
+ orbit_counts = orca(G)
411
+ motif_counts = np.sum(orbit_counts[:, indices], axis=1)
412
+
413
+ if ground_truth_match is not None:
414
+ match_cnt = 0
415
+ for elem in motif_counts:
416
+ if elem == ground_truth_match:
417
+ match_cnt += 1
418
+ num_matches_ref.append(match_cnt / G.number_of_nodes())
419
+
420
+ # hist, _ = np.histogram(
421
+ # motif_counts, bins=bins, density=False)
422
+ motif_temp = np.sum(motif_counts) / G.number_of_nodes()
423
+ total_counts_ref.append(motif_temp)
424
+
425
+ for G in graph_pred_list_remove_empty:
426
+ orbit_counts = orca(G)
427
+ motif_counts = np.sum(orbit_counts[:, indices], axis=1)
428
+
429
+ if ground_truth_match is not None:
430
+ match_cnt = 0
431
+ for elem in motif_counts:
432
+ if elem == ground_truth_match:
433
+ match_cnt += 1
434
+ num_matches_pred.append(match_cnt / G.number_of_nodes())
435
+
436
+ motif_temp = np.sum(motif_counts) / G.number_of_nodes()
437
+ total_counts_pred.append(motif_temp)
438
+
439
+ total_counts_ref = np.array(total_counts_ref)[:, None]
440
+ total_counts_pred = np.array(total_counts_pred)[:, None]
441
+
442
+
443
+ if compute_emd:
444
+ # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN
445
+ # mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=emd, is_hist=False)
446
+ mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=gaussian, is_hist=False)
447
+ else:
448
+ mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=gaussian, is_hist=False)
449
+ return mmd_dist
450
+
451
+
452
+ def orbit_stats_all(graph_ref_list, graph_pred_list, compute_emd=False):
453
+ total_counts_ref = []
454
+ total_counts_pred = []
455
+
456
+ graph_pred_list_remove_empty = [
457
+ G for G in graph_pred_list if not G.number_of_nodes() == 0
458
+ ]
459
+
460
+ for G in graph_ref_list:
461
+ orbit_counts = orca(G)
462
+ orbit_counts_graph = np.sum(orbit_counts, axis=0) / G.number_of_nodes()
463
+ total_counts_ref.append(orbit_counts_graph)
464
+
465
+ for G in graph_pred_list:
466
+ orbit_counts = orca(G)
467
+ orbit_counts_graph = np.sum(orbit_counts, axis=0) / G.number_of_nodes()
468
+ total_counts_pred.append(orbit_counts_graph)
469
+
470
+ total_counts_ref = np.array(total_counts_ref)
471
+ total_counts_pred = np.array(total_counts_pred)
472
+
473
+ # mmd_dist = compute_mmd(
474
+ # total_counts_ref,
475
+ # total_counts_pred,
476
+ # kernel=gaussian,
477
+ # is_hist=False,
478
+ # sigma=30.0)
479
+
480
+ # mmd_dist = compute_mmd(
481
+ # total_counts_ref,
482
+ # total_counts_pred,
483
+ # kernel=gaussian_tv,
484
+ # is_hist=False,
485
+ # sigma=30.0)
486
+
487
+ if compute_emd:
488
+ # mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=emd, sigma=30.0)
489
+ # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN
490
+ mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=gaussian, is_hist=False, sigma=30.0)
491
+ else:
492
+ mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=gaussian_tv, is_hist=False, sigma=30.0)
493
+ return mmd_dist
494
+
495
+
496
+ def eval_acc_lobster_graph(G_list):
497
+ G_list = [copy.deepcopy(gg) for gg in G_list]
498
+ count = 0
499
+ for gg in G_list:
500
+ if is_lobster_graph(gg):
501
+ count += 1
502
+ return count / float(len(G_list))
503
+
504
+
505
+ def eval_acc_tree_graph(G_list):
506
+ count = 0
507
+ for gg in G_list:
508
+ if nx.is_tree(gg):
509
+ count += 1
510
+ return count / float(len(G_list))
511
+
512
+
513
+ def eval_acc_grid_graph(G_list, grid_start=10, grid_end=20):
514
+ count = 0
515
+ for gg in G_list:
516
+ if is_grid_graph(gg):
517
+ count += 1
518
+ return count / float(len(G_list))
519
+
520
+
521
+ def eval_acc_sbm_graph(G_list, p_intra=0.3, p_inter=0.005, strict=True, refinement_steps=1000, is_parallel=True):
522
+ count = 0.0
523
+ if is_parallel:
524
+ with concurrent.futures.ThreadPoolExecutor() as executor:
525
+ for prob in executor.map(is_sbm_graph,
526
+ [gg for gg in G_list], [p_intra for i in range(len(G_list))],
527
+ [p_inter for i in range(len(G_list))],
528
+ [strict for i in range(len(G_list))],
529
+ [refinement_steps for i in range(len(G_list))]):
530
+ count += prob
531
+ else:
532
+ for gg in G_list:
533
+ count += is_sbm_graph(gg, p_intra=p_intra, p_inter=p_inter, strict=strict,
534
+ refinement_steps=refinement_steps)
535
+ return count / float(len(G_list))
536
+
537
+
538
+ def eval_acc_planar_graph(G_list):
539
+ count = 0
540
+ for gg in G_list:
541
+ if is_planar_graph(gg):
542
+ count += 1
543
+ return count / float(len(G_list))
544
+
545
+
546
+ def is_planar_graph(G):
547
+ return nx.is_connected(G) and nx.check_planarity(G)[0]
548
+
549
+
550
+ def is_lobster_graph(G):
551
+ """
552
+ Check a given graph is a lobster graph or not
553
+
554
+ Removing leaf nodes twice:
555
+
556
+ lobster -> caterpillar -> path
557
+
558
+ """
559
+ ### Check if G is a tree
560
+ if nx.is_tree(G):
561
+ G = G.copy()
562
+ ### Check if G is a path after removing leaves twice
563
+ leaves = [n for n, d in G.degree() if d == 1]
564
+ G.remove_nodes_from(leaves)
565
+
566
+ leaves = [n for n, d in G.degree() if d == 1]
567
+ G.remove_nodes_from(leaves)
568
+
569
+ num_nodes = len(G.nodes())
570
+ num_degree_one = [d for n, d in G.degree() if d == 1]
571
+ num_degree_two = [d for n, d in G.degree() if d == 2]
572
+
573
+ if sum(num_degree_one) == 2 and sum(num_degree_two) == 2 * (num_nodes - 2):
574
+ return True
575
+ elif sum(num_degree_one) == 0 and sum(num_degree_two) == 0:
576
+ return True
577
+ else:
578
+ return False
579
+ else:
580
+ return False
581
+
582
+
583
+ def is_grid_graph(G):
584
+ """
585
+ Check if the graph is grid, by comparing with all the real grids with the same node count
586
+ """
587
+ all_grid_file = f"data/all_grids.pt"
588
+ if os.path.isfile(all_grid_file):
589
+ all_grids = torch.load(all_grid_file)
590
+ else:
591
+ all_grids = {}
592
+ for i in range(2, 20):
593
+ for j in range(2, 20):
594
+ G_grid = nx.grid_2d_graph(i, j)
595
+ n_nodes = f"{len(G_grid.nodes())}"
596
+ all_grids[n_nodes] = all_grids.get(n_nodes, []) + [G_grid]
597
+ torch.save(all_grids, all_grid_file)
598
+
599
+ n_nodes = f"{len(G.nodes())}"
600
+ if n_nodes in all_grids:
601
+ for G_grid in all_grids[n_nodes]:
602
+ if nx.faster_could_be_isomorphic(G, G_grid):
603
+ if nx.is_isomorphic(G, G_grid):
604
+ return True
605
+ return False
606
+ else:
607
+ return False
608
+
609
+
610
+ # def is_sbm_graph(G, p_intra=0.3, p_inter=0.005, strict=True, refinement_steps=1000):
611
+ # """
612
+ # Check if how closely given graph matches a SBM with given probabilites by computing mean probability of Wald test statistic for each recovered parameter
613
+ # """
614
+
615
+ # adj = nx.adjacency_matrix(G).toarray()
616
+ # idx = adj.nonzero()
617
+ # g = gt.Graph()
618
+ # g.add_edge_list(np.transpose(idx))
619
+ # try:
620
+ # state = gt.minimize_blockmodel_dl(g)
621
+ # except ValueError:
622
+ # if strict:
623
+ # return False
624
+ # else:
625
+ # return 0.0
626
+
627
+ # # Refine using merge-split MCMC
628
+ # for i in range(refinement_steps):
629
+ # state.multiflip_mcmc_sweep(beta=np.inf, niter=10)
630
+
631
+ # b = state.get_blocks()
632
+ # b = gt.contiguous_map(state.get_blocks())
633
+ # state = state.copy(b=b)
634
+ # e = state.get_matrix()
635
+ # n_blocks = state.get_nonempty_B()
636
+ # node_counts = state.get_nr().get_array()[:n_blocks]
637
+ # edge_counts = e.todense()[:n_blocks, :n_blocks]
638
+ # if strict:
639
+ # if (node_counts > 40).sum() > 0 or (node_counts < 20).sum() > 0 or n_blocks > 5 or n_blocks < 2:
640
+ # return False
641
+
642
+ # max_intra_edges = node_counts * (node_counts - 1)
643
+ # est_p_intra = np.diagonal(edge_counts) / (max_intra_edges + 1e-6)
644
+
645
+ # max_inter_edges = node_counts.reshape((-1, 1)) @ node_counts.reshape((1, -1))
646
+ # np.fill_diagonal(edge_counts, 0)
647
+ # est_p_inter = edge_counts / (max_inter_edges + 1e-6)
648
+
649
+ # W_p_intra = (est_p_intra - p_intra) ** 2 / (est_p_intra * (1 - est_p_intra) + 1e-6)
650
+ # W_p_inter = (est_p_inter - p_inter) ** 2 / (est_p_inter * (1 - est_p_inter) + 1e-6)
651
+
652
+ # W = W_p_inter.copy()
653
+ # np.fill_diagonal(W, W_p_intra)
654
+ # p = 1 - chi2.cdf(abs(W), 1)
655
+ # p = p.mean()
656
+ # if strict:
657
+ # return p > 0.9 # p value < 10 %
658
+ # else:
659
+ # return p
660
+
661
+
662
+ def eval_fraction_isomorphic(fake_graphs, train_graphs):
663
+ count = 0
664
+ for fake_g in fake_graphs:
665
+ for train_g in train_graphs:
666
+ if nx.faster_could_be_isomorphic(fake_g, train_g):
667
+ if nx.is_isomorphic(fake_g, train_g):
668
+ count += 1
669
+ break
670
+ return count / float(len(fake_graphs))
671
+
672
+
673
+ def eval_fraction_unique(fake_graphs, precise=False):
674
+ count_non_unique = 0
675
+ fake_evaluated = []
676
+ for fake_g in fake_graphs:
677
+ unique = True
678
+ if not fake_g.number_of_nodes() == 0:
679
+ for fake_old in fake_evaluated:
680
+ if precise:
681
+ if nx.faster_could_be_isomorphic(fake_g, fake_old):
682
+ if nx.is_isomorphic(fake_g, fake_old):
683
+ count_non_unique += 1
684
+ unique = False
685
+ break
686
+ else:
687
+ if nx.faster_could_be_isomorphic(fake_g, fake_old):
688
+ if nx.could_be_isomorphic(fake_g, fake_old):
689
+ count_non_unique += 1
690
+ unique = False
691
+ break
692
+ if unique:
693
+ fake_evaluated.append(fake_g)
694
+
695
+ frac_unique = (float(len(fake_graphs)) - count_non_unique) / float(
696
+ len(fake_graphs)) # Fraction of distinct isomorphism classes in the fake graphs
697
+
698
+ return frac_unique
699
+
700
+
701
+ def eval_fraction_unique_non_isomorphic_valid(fake_graphs, train_graphs, validity_func=(lambda x: True)):
702
+ count_valid = 0
703
+ count_isomorphic = 0
704
+ count_non_unique = 0
705
+ fake_evaluated = []
706
+ for fake_g in fake_graphs:
707
+ unique = True
708
+
709
+ for fake_old in fake_evaluated:
710
+ if nx.faster_could_be_isomorphic(fake_g, fake_old):
711
+ if nx.is_isomorphic(fake_g, fake_old):
712
+ count_non_unique += 1
713
+ unique = False
714
+ break
715
+ if unique:
716
+ fake_evaluated.append(fake_g)
717
+ non_isomorphic = True
718
+ for train_g in train_graphs:
719
+ if nx.faster_could_be_isomorphic(fake_g, train_g):
720
+ if nx.is_isomorphic(fake_g, train_g):
721
+ count_isomorphic += 1
722
+ non_isomorphic = False
723
+ break
724
+ if non_isomorphic:
725
+ if validity_func(fake_g):
726
+ count_valid += 1
727
+
728
+ frac_unique = (float(len(fake_graphs)) - count_non_unique) / float(
729
+ len(fake_graphs)) # Fraction of distinct isomorphism classes in the fake graphs
730
+ frac_unique_non_isomorphic = (float(len(fake_graphs)) - count_non_unique - count_isomorphic) / float(
731
+ len(fake_graphs)) # Fraction of distinct isomorphism classes in the fake graphs that are not in the training set
732
+ frac_unique_non_isomorphic_valid = count_valid / float(
733
+ len(fake_graphs)) # Fraction of distinct isomorphism classes in the fake graphs that are not in the training set and are valid
734
+ return frac_unique, frac_unique_non_isomorphic, frac_unique_non_isomorphic_valid
735
+
736
+
737
+ class SpectreSamplingMetrics(nn.Module):
738
+ def __init__(self, data_loaders, compute_emd, metrics_list):
739
+ super().__init__()
740
+
741
+ self.train_graphs = self.loader_to_nx(data_loaders['train'])
742
+ self.val_graphs = self.loader_to_nx(data_loaders['val'])
743
+ self.test_graphs = self.loader_to_nx(data_loaders['test'])
744
+ self.num_graphs_test = len(self.test_graphs)
745
+ self.num_graphs_val = len(self.val_graphs)
746
+ self.compute_emd = compute_emd
747
+ self.metrics_list = metrics_list
748
+
749
+ def loader_to_nx(self, loader):
750
+ networkx_graphs = []
751
+ for i, batch in enumerate(loader):
752
+ data_list = batch.to_data_list()
753
+ for j, data in enumerate(data_list):
754
+ networkx_graphs.append(to_networkx(data, node_attrs=None, edge_attrs=None, to_undirected=True,
755
+ remove_self_loops=True))
756
+ return networkx_graphs
757
+
758
+ def forward(self, generated_graphs: list, local_rank, test=False):
759
+ reference_graphs = self.test_graphs if test else self.val_graphs
760
+ if local_rank == 0:
761
+ print(f"Computing sampling metrics between {len(generated_graphs)} generated graphs and {len(reference_graphs)}"
762
+ f" test graphs -- emd computation: {self.compute_emd}")
763
+ networkx_graphs = []
764
+ adjacency_matrices = []
765
+ if local_rank == 0:
766
+ print("Building networkx graphs...")
767
+ for graph in generated_graphs:
768
+ node_types, edge_types = graph
769
+ A = edge_types.bool().cpu().numpy()
770
+ adjacency_matrices.append(A)
771
+
772
+ nx_graph = nx.from_numpy_array(A)
773
+ networkx_graphs.append(nx_graph)
774
+
775
+ np.savez('generated_adjs.npz', *adjacency_matrices)
776
+
777
+ to_log = {}
778
+ if 'degree' in self.metrics_list:
779
+ if local_rank == 0:
780
+ print("Computing degree stats..")
781
+ degree = degree_stats(reference_graphs, networkx_graphs, is_parallel=True,
782
+ compute_emd=self.compute_emd)
783
+
784
+ to_log['degree'] = degree
785
+
786
+ if wandb.run:
787
+ wandb.run.summary['degree'] = degree
788
+
789
+ # val_eigvals = [graph["eigval"][1:self.k + 1].cpu().detach().numpy() for graph in self.val]
790
+ # train_eigvals = [graph["eigval"][1:self.k + 1].cpu().detach().numpy() for graph in self.train]
791
+
792
+ # eigval_stats(eig_ref_list, eig_pred_list, max_eig=20, is_parallel=True, compute_emd=False)
793
+ # spectral_filter_stats(eigvec_ref_list, eigval_ref_list, eigvec_pred_list, eigval_pred_list, is_parallel=False,
794
+ # compute_emd=False) # This is the one called wavelet
795
+
796
+
797
+ if 'spectre' in self.metrics_list:
798
+ if local_rank == 0:
799
+ print("Computing spectre stats...")
800
+ spectre = spectral_stats(reference_graphs, networkx_graphs, is_parallel=True, n_eigvals=-1,
801
+ compute_emd=self.compute_emd)
802
+
803
+ to_log['spectre'] = spectre
804
+ if wandb.run:
805
+ wandb.run.summary['spectre'] = spectre
806
+
807
+ if 'clustering' in self.metrics_list:
808
+ if local_rank == 0:
809
+ print("Computing clustering stats...")
810
+ clustering = clustering_stats(reference_graphs, networkx_graphs, bins=100, is_parallel=True,
811
+ compute_emd=self.compute_emd)
812
+ to_log['clustering'] = clustering
813
+ if wandb.run:
814
+ wandb.run.summary['clustering'] = clustering
815
+
816
+ if 'motif' in self.metrics_list:
817
+ if local_rank == 0:
818
+ print("Computing motif stats")
819
+ motif = motif_stats(reference_graphs, networkx_graphs, motif_type='4cycle', ground_truth_match=None, bins=100,
820
+ compute_emd=self.compute_emd)
821
+ to_log['motif'] = motif
822
+ if wandb.run:
823
+ wandb.run.summary['motif'] = motif
824
+
825
+ if 'orbit' in self.metrics_list:
826
+ if local_rank == 0:
827
+ print("Computing orbit stats...")
828
+ orbit = orbit_stats_all(reference_graphs, networkx_graphs, compute_emd=self.compute_emd)
829
+ to_log['orbit'] = orbit
830
+ if wandb.run:
831
+ wandb.run.summary['orbit'] = orbit
832
+
833
+ if 'sbm' in self.metrics_list:
834
+ if local_rank == 0:
835
+ print("Computing accuracy...")
836
+ acc = eval_acc_sbm_graph(networkx_graphs, refinement_steps=100, strict=True)
837
+ to_log['sbm_acc'] = acc
838
+ if wandb.run:
839
+ wandb.run.summary['sbmacc'] = acc
840
+
841
+ if 'planar' in self.metrics_list:
842
+ if local_rank ==0:
843
+ print('Computing planar accuracy...')
844
+ planar_acc = eval_acc_planar_graph(networkx_graphs)
845
+ to_log['planar_acc'] = planar_acc
846
+ if wandb.run:
847
+ wandb.run.summary['planar_acc'] = planar_acc
848
+
849
+ if 'sbm' or 'planar' in self.metrics_list:
850
+ if local_rank == 0:
851
+ print("Computing all fractions...")
852
+ frac_unique, frac_unique_non_isomorphic, fraction_unique_non_isomorphic_valid = eval_fraction_unique_non_isomorphic_valid(
853
+ networkx_graphs, self.train_graphs, is_sbm_graph if 'sbm' in self.metrics_list else is_planar_graph)
854
+ frac_non_isomorphic = 1.0 - eval_fraction_isomorphic(networkx_graphs, self.train_graphs)
855
+ to_log.update({'sampling/frac_unique': frac_unique,
856
+ 'sampling/frac_unique_non_iso': frac_unique_non_isomorphic,
857
+ 'sampling/frac_unic_non_iso_valid': fraction_unique_non_isomorphic_valid,
858
+ 'sampling/frac_non_iso': frac_non_isomorphic})
859
+
860
+ if local_rank == 0:
861
+ print("Sampling statistics", to_log)
862
+ if wandb.run:
863
+ wandb.log(to_log, commit=False)
864
+
865
+ def reset(self):
866
+ pass
867
+
868
+
869
+ def loader_to_nx(loader):
870
+ networkx_graphs = {}
871
+ for i, batch in enumerate(loader):
872
+ data_list = batch.to_data_list()
873
+ for j, data in enumerate(data_list):
874
+ networkx_graphs[data.prompt_id.squeeze(0).item()] = [to_networkx(data, node_attrs=None, edge_attrs=None, to_undirected=True, remove_self_loops=True)]
875
+
876
+ return networkx_graphs
877
+
878
+ def compute_metrics(generated_graphs, referenced_graphs):
879
+ networkx_graphs = defaultdict(list)
880
+ adjacency_matrices = defaultdict(list)
881
+ for key in generated_graphs:
882
+ for graph in generated_graphs[key]:
883
+ node_types, edge_types = graph
884
+ A = edge_types.bool().cpu().numpy()
885
+ nx_graph = nx.from_numpy_array(A)
886
+
887
+ networkx_graphs[key].append(nx_graph)
888
+ adjacency_matrices[key].append(A)
889
+
890
+ new_referenced_graphs = []
891
+ for key in referenced_graphs:
892
+ new_referenced_graphs.extend(referenced_graphs[key])
893
+ referenced_graphs = new_referenced_graphs
894
+
895
+ nx_graphs = []
896
+ for key in networkx_graphs:
897
+ nx_graphs.extend(networkx_graphs[key])
898
+
899
+ return nx_graphs
900
+
901
+
902
+
903
+
904
+ class Comm20SamplingMetrics(SpectreSamplingMetrics):
905
+ def __init__(self, data_loaders):
906
+ super().__init__(data_loaders=data_loaders,
907
+ compute_emd=True,
908
+ metrics_list=['degree', 'clustering', 'orbit'])
909
+
910
+
911
+ class PlanarSamplingMetrics(SpectreSamplingMetrics):
912
+ def __init__(self, data_loaders):
913
+ super().__init__(data_loaders=data_loaders,
914
+ compute_emd=False,
915
+ metrics_list=['degree', 'clustering', 'orbit', 'spectre', 'planar'])
916
+
917
+
918
+ class SBMSamplingMetrics(SpectreSamplingMetrics):
919
+ def __init__(self, data_loaders):
920
+ super().__init__(data_loaders=data_loaders,
921
+ compute_emd=False,
922
+ metrics_list=['degree', 'clustering', 'orbit', 'spectre', 'sbm'])
923
+
924
+ class CrossDomainSamplingMetrics(SpectreSamplingMetrics):
925
+ def __init__(self, data_loaders):
926
+ super().__init__(data_loaders=data_loaders,
927
+ compute_emd=False,
928
+ metrics_list=['degree', 'clustering', 'orbit', 'spectre'])
analysis/visualization.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from rdkit import Chem
4
+ from rdkit.Chem import Draw, AllChem
5
+ from rdkit.Geometry import Point3D
6
+ from rdkit import RDLogger
7
+ import imageio
8
+ import networkx as nx
9
+ import numpy as np
10
+ import rdkit.Chem
11
+ import wandb
12
+ import matplotlib.pyplot as plt
13
+
14
+
15
+
16
+
17
+
18
+ class MolecularVisualization:
19
+ def __init__(self, remove_h, dataset_infos):
20
+ self.remove_h = remove_h
21
+ self.dataset_infos = dataset_infos
22
+
23
+ def mol_from_graphs(self, node_list, adjacency_matrix):
24
+ """
25
+ Convert graphs to rdkit molecules
26
+ node_list: the nodes of a batch of nodes (bs x n)
27
+ adjacency_matrix: the adjacency_matrix of the molecule (bs x n x n)
28
+ """
29
+ # dictionary to map integer value to the char of atom
30
+ atom_decoder = self.dataset_infos.atom_decoder
31
+
32
+ # create empty editable mol object
33
+ mol = Chem.RWMol()
34
+
35
+ # add atoms to mol and keep track of index
36
+ node_to_idx = {}
37
+ for i in range(len(node_list)):
38
+ if node_list[i] == -1:
39
+ continue
40
+ a = Chem.Atom(atom_decoder[int(node_list[i])])
41
+ molIdx = mol.AddAtom(a)
42
+ node_to_idx[i] = molIdx
43
+
44
+ for ix, row in enumerate(adjacency_matrix):
45
+ for iy, bond in enumerate(row):
46
+ # only traverse half the symmetric matrix
47
+ if iy <= ix:
48
+ continue
49
+ if bond == 1:
50
+ bond_type = Chem.rdchem.BondType.SINGLE
51
+ elif bond == 2:
52
+ bond_type = Chem.rdchem.BondType.DOUBLE
53
+ elif bond == 3:
54
+ bond_type = Chem.rdchem.BondType.TRIPLE
55
+ elif bond == 4:
56
+ bond_type = Chem.rdchem.BondType.AROMATIC
57
+ else:
58
+ continue
59
+ mol.AddBond(node_to_idx[ix], node_to_idx[iy], bond_type)
60
+
61
+ try:
62
+ mol = mol.GetMol()
63
+ except rdkit.Chem.KekulizeException:
64
+ print("Can't kekulize molecule")
65
+ mol = None
66
+ return mol
67
+
68
+ def visualize(self, path: str, molecules: list, num_molecules_to_visualize: int, log='graph'):
69
+ # define path to save figures
70
+ if not os.path.exists(path):
71
+ os.makedirs(path)
72
+
73
+ # visualize the final molecules
74
+ print(f"Visualizing {num_molecules_to_visualize} of {len(molecules)}")
75
+ if num_molecules_to_visualize > len(molecules):
76
+ print(f"Shortening to {len(molecules)}")
77
+ num_molecules_to_visualize = len(molecules)
78
+
79
+ for i in range(num_molecules_to_visualize):
80
+ file_path = os.path.join(path, 'molecule_{}.png'.format(i))
81
+ mol = self.mol_from_graphs(molecules[i][0].numpy(), molecules[i][1].numpy())
82
+ try:
83
+ Draw.MolToFile(mol, file_path)
84
+ if wandb.run and log is not None:
85
+ print(f"Saving {file_path} to wandb")
86
+ wandb.log({log: wandb.Image(file_path)}, commit=True)
87
+ except rdkit.Chem.KekulizeException:
88
+ print("Can't kekulize molecule")
89
+
90
+
91
+ def visualize_chain(self, path, nodes_list, adjacency_matrix, trainer=None):
92
+ RDLogger.DisableLog('rdApp.*')
93
+ # convert graphs to the rdkit molecules
94
+ mols = [self.mol_from_graphs(nodes_list[i], adjacency_matrix[i]) for i in range(nodes_list.shape[0])]
95
+
96
+ # find the coordinates of atoms in the final molecule
97
+ final_molecule = mols[-1]
98
+ AllChem.Compute2DCoords(final_molecule)
99
+
100
+ coords = []
101
+ for i, atom in enumerate(final_molecule.GetAtoms()):
102
+ positions = final_molecule.GetConformer().GetAtomPosition(i)
103
+ coords.append((positions.x, positions.y, positions.z))
104
+
105
+ # align all the molecules
106
+ for i, mol in enumerate(mols):
107
+ AllChem.Compute2DCoords(mol)
108
+ conf = mol.GetConformer()
109
+ for j, atom in enumerate(mol.GetAtoms()):
110
+ x, y, z = coords[j]
111
+ conf.SetAtomPosition(j, Point3D(x, y, z))
112
+
113
+ # draw gif
114
+ save_paths = []
115
+ num_frams = nodes_list.shape[0]
116
+
117
+ for frame in range(num_frams):
118
+ file_name = os.path.join(path, 'fram_{}.png'.format(frame))
119
+ Draw.MolToFile(mols[frame], file_name, size=(300, 300), legend=f"Frame {frame}")
120
+ save_paths.append(file_name)
121
+
122
+ imgs = [imageio.imread(fn) for fn in save_paths]
123
+ gif_path = os.path.join(os.path.dirname(path), '{}.gif'.format(path.split('/')[-1]))
124
+ imgs.extend([imgs[-1]] * 10)
125
+ imageio.mimsave(gif_path, imgs, subrectangles=True, duration=20)
126
+
127
+ if wandb.run:
128
+ print(f"Saving {gif_path} to wandb")
129
+ wandb.log({"chain": wandb.Video(gif_path, fps=5, format="gif")}, commit=True)
130
+
131
+ # draw grid image
132
+ try:
133
+ img = Draw.MolsToGridImage(mols, molsPerRow=10, subImgSize=(200, 200))
134
+ img.save(os.path.join(path, '{}_grid_image.png'.format(path.split('/')[-1])))
135
+ except Chem.rdchem.KekulizeException:
136
+ print("Can't kekulize molecule")
137
+ return mols
138
+
139
+
140
+ class NonMolecularVisualization:
141
+ def to_networkx(self, node_list, adjacency_matrix):
142
+ """
143
+ Convert graphs to networkx graphs
144
+ node_list: the nodes of a batch of nodes (bs x n)
145
+ adjacency_matrix: the adjacency_matrix of the molecule (bs x n x n)
146
+ """
147
+ graph = nx.Graph()
148
+
149
+ for i in range(len(node_list)):
150
+ if node_list[i] == -1:
151
+ continue
152
+ graph.add_node(i, number=i, symbol=node_list[i], color_val=node_list[i])
153
+
154
+ rows, cols = np.where(adjacency_matrix >= 1)
155
+ edges = zip(rows.tolist(), cols.tolist())
156
+ for edge in edges:
157
+ edge_type = adjacency_matrix[edge[0]][edge[1]]
158
+ graph.add_edge(edge[0], edge[1], color=float(edge_type), weight=3 * edge_type)
159
+
160
+ return graph
161
+
162
+ def visualize_non_molecule(self, graph, pos, path, iterations=100, node_size=100, largest_component=False):
163
+ if largest_component:
164
+ CGs = [graph.subgraph(c) for c in nx.connected_components(graph)]
165
+ CGs = sorted(CGs, key=lambda x: x.number_of_nodes(), reverse=True)
166
+ graph = CGs[0]
167
+
168
+ # Plot the graph structure with colors
169
+ if pos is None:
170
+ pos = nx.spring_layout(graph, iterations=iterations)
171
+
172
+ # Set node colors based on the eigenvectors
173
+ w, U = np.linalg.eigh(nx.normalized_laplacian_matrix(graph).toarray())
174
+ vmin, vmax = np.min(U[:, 1]), np.max(U[:, 1])
175
+ m = max(np.abs(vmin), vmax)
176
+ vmin, vmax = -m, m
177
+
178
+ plt.figure()
179
+ nx.draw(graph, pos, font_size=5, node_size=node_size, with_labels=False, node_color=U[:, 1],
180
+ cmap=plt.cm.coolwarm, vmin=vmin, vmax=vmax, edge_color='grey')
181
+
182
+ plt.tight_layout()
183
+ plt.savefig(path)
184
+ plt.close("all")
185
+
186
+ def visualize(self, path: str, graphs: list, num_graphs_to_visualize: int, log='graph'):
187
+ # define path to save figures
188
+ if not os.path.exists(path):
189
+ os.makedirs(path)
190
+
191
+ # visualize the final molecules
192
+ for i in range(num_graphs_to_visualize):
193
+ file_path = os.path.join(path, 'graph_{}.png'.format(i))
194
+ graph = self.to_networkx(graphs[i][0].numpy(), graphs[i][1].numpy())
195
+ self.visualize_non_molecule(graph=graph, pos=None, path=file_path)
196
+ im = plt.imread(file_path)
197
+ if wandb.run and log is not None:
198
+ wandb.log({log: [wandb.Image(im, caption=file_path)]})
199
+
200
+ def visualize_chain(self, path, nodes_list, adjacency_matrix):
201
+ # convert graphs to networkx
202
+ graphs = [self.to_networkx(nodes_list[i], adjacency_matrix[i]) for i in range(nodes_list.shape[0])]
203
+ # find the coordinates of atoms in the final molecule
204
+ final_graph = graphs[-1]
205
+ final_pos = nx.spring_layout(final_graph, seed=0)
206
+
207
+ # draw gif
208
+ save_paths = []
209
+ num_frams = nodes_list.shape[0]
210
+
211
+ for frame in range(num_frams):
212
+ file_name = os.path.join(path, 'fram_{}.png'.format(frame))
213
+ self.visualize_non_molecule(graph=graphs[frame], pos=final_pos, path=file_name)
214
+ save_paths.append(file_name)
215
+
216
+ imgs = [imageio.imread(fn) for fn in save_paths]
217
+ gif_path = os.path.join(os.path.dirname(path), '{}.gif'.format(path.split('/')[-1]))
218
+ imgs.extend([imgs[-1]] * 10)
219
+ imageio.mimsave(gif_path, imgs, subrectangles=True, duration=20)
220
+ if wandb.run:
221
+ wandb.log({'chain': [wandb.Video(gif_path, caption=gif_path, format="gif")]})
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from omegaconf import OmegaConf
2
+ import gradio as gr
3
+
4
+ from dataset import init_dataset, compute_input_output_dims
5
+ from extra_features import ExtraFeatures
6
+ from demo_model import LGGMText2Graph_Demo
7
+ from analysis.spectre_utils import CrossDomainSamplingMetrics
8
+ import networkx as nx
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+
12
+
13
+ cfg = OmegaConf.load('./config.yaml')
14
+ hydra_path = '.'
15
+
16
+
17
+ data_loaders, num_classes, max_n_nodes, nodes_dist, edge_types, node_types, n_nodes, cond_dims, cond_emb = init_dataset(cfg.dataset.name, cfg.train.batch_size, hydra_path, cfg.general.condition, cfg.model.transition)
18
+
19
+ extra_features = ExtraFeatures(cfg.model.extra_features, max_n_nodes)
20
+
21
+ input_dims, output_dims = compute_input_output_dims(data_loaders['train'], extra_features)
22
+
23
+ sampling_metrics = CrossDomainSamplingMetrics(data_loaders)
24
+
25
+ model = LGGMText2Graph_Demo.load_from_checkpoint('last-v1.ckpt')
26
+
27
+ model.init_prompt_encoder()
28
+
29
+ def calculate_average_degree(graph):
30
+ num_nodes = graph.number_of_nodes()
31
+ num_edges = graph.number_of_edges()
32
+ return (2 * num_edges) / num_nodes if num_nodes > 0 else 0
33
+
34
+
35
+ def predict(text, num_nodes = None):
36
+ # Assuming model.generate and other processes are defined as before
37
+ graphs = model.generate(text, int(num_nodes))
38
+ ccs = []
39
+ degs = []
40
+ images = []
41
+
42
+ for g in graphs:
43
+ ccs.append(nx.average_clustering(g))
44
+ degs.append(calculate_average_degree(g))
45
+
46
+ fig, ax = plt.subplots()
47
+ nx.draw(g, ax=ax)
48
+ fig.canvas.draw()
49
+ image = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
50
+ image = image.reshape(fig.canvas.get_width_height()[::-1] + (3,))
51
+ plt.close(fig)
52
+
53
+ images.append(image)
54
+
55
+ return images[0], images[1], images[2], images[3], images[4], ccs[0], ccs[1], ccs[2], ccs[3], ccs[4], degs[0], degs[1], degs[2], degs[3], degs[4]
56
+
57
+ def clear(input_text):
58
+ return None, None
59
+
60
+
61
+ with gr.Blocks() as demo:
62
+ gr.Markdown("## Text2Graph Generation Demo")
63
+ with gr.Row():
64
+ with gr.Column():
65
+ input_text = gr.Textbox(label="Input your text prompt here", placeholder="Type here...")
66
+ with gr.Column():
67
+ input_num = gr.Slider(5, 200, value=10, label="Count", info="Number of nodes in the graph to be generated")
68
+ with gr.Column():
69
+ gr.Markdown("### Suggested Prompts")
70
+ gr.Markdown("1. Create a complex network with high clustering coefficient.\n2. Create a graph with extremely low number of triangles.")
71
+
72
+ with gr.Row() as output_row:
73
+ output_images = [gr.Image(label = f"Generated Network #{_}") for _ in range(5)]
74
+ with gr.Row():
75
+ output_texts_cc = [gr.Textbox(label=f"CC #{_}") for _ in range(5)]
76
+ with gr.Row():
77
+ output_texts_deg = [gr.Textbox(label=f"DEG #{_}") for _ in range(5)]
78
+
79
+ with gr.Row():
80
+ submit_button = gr.Button("Submit")
81
+ clear_button = gr.Button("Clear")
82
+
83
+ # Change function is linked to the submit button
84
+ submit_button.click(fn=predict, inputs=[input_text, input_num], outputs=output_images + output_texts_cc + output_texts_deg)
85
+
86
+ # Clear function resets the text input and clears the outputs
87
+ clear_button.click(fn=clear, inputs=input_text, outputs=output_images + output_texts_cc + output_texts_deg)
88
+
89
+ demo.launch()
config.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ job:
3
+ chdir: True
4
+ run:
5
+ dir: ../outputs/${general.name}
6
+ general:
7
+ name: 'cc_high'
8
+ wandb: 'online' # online | offline | disabled
9
+ gpus: 1
10
+ resume: null
11
+ test_only: null
12
+ sample_every_val: 4
13
+ check_val_every_n_epochs: 10
14
+ samples_to_generate: 100
15
+ samples_to_save: 3
16
+ chains_to_save: 1
17
+ log_every_steps: 50
18
+ number_chain_steps: 8
19
+ final_model_samples_to_generate: 100
20
+ final_model_samples_to_save: 30
21
+ final_model_chains_to_save: 20
22
+ condition: ' '
23
+ setting: 'train_scratch'
24
+ ckpt_path: null
25
+ model:
26
+ type: 'discrete'
27
+ transition: 'marginal'
28
+ model: 'graph_tf'
29
+ diffusion_steps: 500
30
+ diffusion_noise_schedule: 'cosine'
31
+ n_layers: 5
32
+ extra_features: 'all'
33
+ hidden_mlp_dims: {'X': 256, 'E': 128, 'y': 128}
34
+ hidden_dims: {'dx': 256, 'de': 64, 'dy': 64, 'n_head': 8, 'dim_ffX': 256, 'dim_ffE': 128, 'dim_ffy': 128}
35
+ lambda_train: [5, 0]
36
+ train:
37
+ n_epochs: 300
38
+ batch_size: 8
39
+ accumulate_grad_batches: 1
40
+ lr: 0.0002
41
+ clip_grad: null
42
+ save_model: True
43
+ num_workers: 0
44
+ ema_decay: 0
45
+ weight_decay: 1e-12
46
+ seed: 0
47
+ progress_bar: false
48
+ optimizer: adamw
49
+ dataset:
50
+ datadir: 'graph/'
51
+ name: cc_high
52
+ remove_h: null
53
+ sample: 'seed'
dataset.py ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from distributions import DistributionNodes
3
+ from utils import to_dense
4
+ from torch_geometric.loader import DataLoader
5
+ from torch_geometric.data import Data
6
+ from torch_geometric.utils import remove_self_loops, to_undirected
7
+ import os
8
+ from sentence_transformers import SentenceTransformer
9
+ import random
10
+
11
+
12
+ def arrange_data(adj_matrix, cond_emb, ind):
13
+ n_nodes = adj_matrix.shape[0]
14
+
15
+ edge_index = adj_matrix.nonzero().t()
16
+ edge_attr = torch.tensor([[0, 1] for _ in range(edge_index.shape[1])])
17
+
18
+ edge_index, edge_attr = to_undirected(edge_index, edge_attr, n_nodes, reduce = 'mean')
19
+ edge_index, edge_attr = remove_self_loops(edge_index, edge_attr)
20
+
21
+ x = torch.ones((n_nodes, 1))
22
+
23
+ y = torch.empty(1, 0)
24
+ cond_emb = torch.tensor(cond_emb).unsqueeze(0)
25
+
26
+ return Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, prompt_id = torch.tensor(ind), cond_emb = cond_emb)
27
+
28
+
29
+
30
+ def load_dataset_cc(dataname, batch_size, hydra_path, condition):
31
+ domains = ['cc_high', 'cc_medium', 'cc_low']
32
+
33
+
34
+ model = SentenceTransformer("all-MiniLM-L6-v2")
35
+ cond_embs = model.encode(condition)
36
+
37
+ for domain in domains:
38
+ if not os.path.exists(f'{hydra_path}/graphs/{domain}/train.pt'):
39
+
40
+ data = torch.load(f'{hydra_path}/graphs/{domain}/{domain}.pt')
41
+
42
+ #fix seed
43
+ torch.manual_seed(0)
44
+
45
+ #random permute and split
46
+ n = len(data)
47
+ indices = torch.randperm(n)
48
+
49
+ if domain == 'eco':
50
+ train_indices = indices[:4].repeat(50)
51
+ val_indices = indices[4:5].repeat(50)
52
+ test_indices = indices[5:]
53
+ else:
54
+ train_indices = indices[:int(0.7 * n)]
55
+ val_indices = indices[int(0.7 * n):int(0.8 * n)]
56
+ test_indices = indices[int(0.8 * n):]
57
+
58
+ train_data = [data[_] for _ in train_indices]
59
+ val_data = [data[_] for _ in val_indices]
60
+ test_data = [data[_] for _ in test_indices]
61
+
62
+ torch.save(train_indices, f'{hydra_path}/graphs/{domain}/train_indices.pt')
63
+ torch.save(val_indices, f'{hydra_path}/graphs/{domain}/val_indices.pt')
64
+ torch.save(test_indices, f'{hydra_path}/graphs/{domain}/test_indices.pt')
65
+
66
+ torch.save(train_data, f'{hydra_path}/graphs/{domain}/train.pt')
67
+ torch.save(val_data, f'{hydra_path}/graphs/{domain}/val.pt')
68
+ torch.save(test_data, f'{hydra_path}/graphs/{domain}/test.pt')
69
+
70
+
71
+ train_data, val_data, test_data = [], [], []
72
+
73
+ if dataname in domains: #only for test
74
+ train_d = torch.load(f'{hydra_path}/graphs/{dataname}/train.pt')
75
+ val_d = torch.load(f'{hydra_path}/graphs/{dataname}/val.pt')
76
+ test_d = torch.load(f'{hydra_path}/graphs/{dataname}/test.pt')
77
+
78
+ train_indices = torch.load(f'{hydra_path}/graphs/{dataname}/train_indices.pt')
79
+ val_indices = torch.load(f'{hydra_path}/graphs/{dataname}/val_indices.pt')
80
+ test_indices = torch.load(f'{hydra_path}/graphs/{dataname}/test_indices.pt')
81
+
82
+ with open(f'{hydra_path}/graphs/{dataname}/text_prompt_order.txt', 'r') as f:
83
+ text_prompt = f.readlines()
84
+ text_prompt = [x.strip() for x in text_prompt]
85
+
86
+ # text_prompt = ['1111111shgowhgo234o234']*10000
87
+ print(text_prompt[0])
88
+ text_embs = model.encode(text_prompt)
89
+ cond_embs = torch.tensor(text_embs)
90
+
91
+ train_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)])
92
+ val_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_d, val_indices)])
93
+
94
+
95
+ if dataname != 'eco':
96
+ # test_data = [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_d, test_indices)]
97
+ test_data = [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_d, test_indices)]
98
+ else:
99
+ test_data = [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)] + [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_data, val_indices)] + [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_data, test_indices)]
100
+
101
+
102
+ elif dataname == 'all':
103
+ for i, domain in enumerate(domains):
104
+ train_d = torch.load(f'{hydra_path}/graphs/{domain}/train.pt')
105
+ val_d = torch.load(f'{hydra_path}/graphs/{domain}/val.pt')
106
+ test_d = torch.load(f'{hydra_path}/graphs/{domain}/test.pt')
107
+
108
+ train_indices = torch.load(f'{hydra_path}/graphs/{domain}/train_indices.pt')
109
+ val_indices = torch.load(f'{hydra_path}/graphs/{domain}/val_indices.pt')
110
+ test_indices = torch.load(f'{hydra_path}/graphs/{domain}/test_indices.pt')
111
+
112
+ # text_prompt = torch.load(f'{hydra_path}/graphs/{domain}/text_prompt_order.pt')
113
+
114
+ with open(f'{hydra_path}/graphs/{domain}/text_prompt_order.txt', 'r') as f:
115
+ text_prompt = f.readlines()
116
+ text_prompt = [x.strip() for x in text_prompt]
117
+
118
+ print(domain, text_prompt[0])
119
+
120
+ text_embs = model.encode(text_prompt)
121
+
122
+ train_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)])
123
+ val_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_d, val_indices)])
124
+ test_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_d, test_indices)])
125
+ print(i, domain, len(train_data), len(val_data), len(test_data))
126
+
127
+ print('Size of dataset', len(train_data), len(val_data), len(test_data))
128
+
129
+ train_loader = DataLoader(train_data, batch_size = batch_size, shuffle=True)
130
+ val_loader = DataLoader(val_data, batch_size = batch_size, shuffle=False)
131
+ test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
132
+
133
+ return train_loader, val_loader, test_loader, train_data, val_data, test_data, text_embs.shape[1], torch.tensor(cond_embs)
134
+
135
+
136
+
137
+
138
+ def load_dataset_deg(dataname, batch_size, hydra_path, condition):
139
+ domains = ['deg_high', 'deg_medium', 'deg_low']
140
+
141
+
142
+ model = SentenceTransformer("all-MiniLM-L6-v2")
143
+ cond_embs = model.encode(condition)
144
+
145
+ for domain in domains:
146
+ if not os.path.exists(f'{hydra_path}/graphs/{domain}/train.pt'):
147
+
148
+ data = torch.load(f'{hydra_path}/graphs/{domain}/{domain}.pt')
149
+
150
+ #fix seed
151
+ torch.manual_seed(0)
152
+
153
+ #random permute and split
154
+ n = len(data)
155
+ indices = torch.randperm(n)
156
+
157
+ if domain == 'eco':
158
+ train_indices = indices[:4].repeat(50)
159
+ val_indices = indices[4:5].repeat(50)
160
+ test_indices = indices[5:]
161
+ else:
162
+ train_indices = indices[:int(0.7 * n)]
163
+ val_indices = indices[int(0.7 * n):int(0.8 * n)]
164
+ test_indices = indices[int(0.8 * n):]
165
+
166
+ train_data = [data[_] for _ in train_indices]
167
+ val_data = [data[_] for _ in val_indices]
168
+ test_data = [data[_] for _ in test_indices]
169
+
170
+ torch.save(train_indices, f'{hydra_path}/graphs/{domain}/train_indices.pt')
171
+ torch.save(val_indices, f'{hydra_path}/graphs/{domain}/val_indices.pt')
172
+ torch.save(test_indices, f'{hydra_path}/graphs/{domain}/test_indices.pt')
173
+
174
+ torch.save(train_data, f'{hydra_path}/graphs/{domain}/train.pt')
175
+ torch.save(val_data, f'{hydra_path}/graphs/{domain}/val.pt')
176
+ torch.save(test_data, f'{hydra_path}/graphs/{domain}/test.pt')
177
+
178
+
179
+ train_data, val_data, test_data = [], [], []
180
+
181
+ if dataname in domains: #only for test
182
+ train_d = torch.load(f'{hydra_path}/graphs/{dataname}/train.pt')
183
+ val_d = torch.load(f'{hydra_path}/graphs/{dataname}/val.pt')
184
+ test_d = torch.load(f'{hydra_path}/graphs/{dataname}/test.pt')
185
+
186
+ train_indices = torch.load(f'{hydra_path}/graphs/{dataname}/train_indices.pt')
187
+ val_indices = torch.load(f'{hydra_path}/graphs/{dataname}/val_indices.pt')
188
+ test_indices = torch.load(f'{hydra_path}/graphs/{dataname}/test_indices.pt')
189
+
190
+ with open(f'{hydra_path}/graphs/{dataname}/text_prompt_order.txt', 'r') as f:
191
+ text_prompt = f.readlines()
192
+ text_prompt = [x.strip() for x in text_prompt]
193
+
194
+
195
+ text_embs = model.encode(text_prompt)
196
+ cond_embs = torch.tensor(text_embs)
197
+
198
+ train_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)])
199
+ val_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_d, val_indices)])
200
+
201
+
202
+ if dataname != 'eco':
203
+ test_data = [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_d, test_indices)]
204
+ else:
205
+ test_data = [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)] + [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_data, val_indices)] + [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_data, test_indices)]
206
+
207
+
208
+ elif dataname == 'all':
209
+ for i, domain in enumerate(domains):
210
+ train_d = torch.load(f'{hydra_path}/graphs/{domain}/train.pt')
211
+ val_d = torch.load(f'{hydra_path}/graphs/{domain}/val.pt')
212
+ test_d = torch.load(f'{hydra_path}/graphs/{domain}/test.pt')
213
+
214
+ train_indices = torch.load(f'{hydra_path}/graphs/{domain}/train_indices.pt')
215
+ val_indices = torch.load(f'{hydra_path}/graphs/{domain}/val_indices.pt')
216
+ test_indices = torch.load(f'{hydra_path}/graphs/{domain}/test_indices.pt')
217
+
218
+ # text_prompt = torch.load(f'{hydra_path}/graphs/{domain}/text_prompt_order.pt')
219
+
220
+ with open(f'{hydra_path}/graphs/{domain}/text_prompt_order.txt', 'r') as f:
221
+ text_prompt = f.readlines()
222
+ text_prompt = [x.strip() for x in text_prompt]
223
+
224
+ print(domain, text_prompt[0])
225
+
226
+ text_embs = model.encode(text_prompt)
227
+
228
+ train_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)])
229
+ val_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_d, val_indices)])
230
+ test_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_d, test_indices)])
231
+ print(i, domain, len(train_data), len(val_data), len(test_data))
232
+
233
+ print('Size of dataset', len(train_data), len(val_data), len(test_data))
234
+
235
+ train_loader = DataLoader(train_data, batch_size = batch_size, shuffle=True)
236
+ val_loader = DataLoader(val_data, batch_size = batch_size, shuffle=False)
237
+ test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
238
+
239
+ return train_loader, val_loader, test_loader, train_data, val_data, test_data, text_embs.shape[1], torch.tensor(cond_embs)
240
+
241
+
242
+
243
+
244
+ def init_dataset(dataname, batch_size, hydra_path, condition, transition):
245
+ train_loader, val_loader, test_loader, train_data, val_data, test_data, cond_dims, cond_emb = load_dataset_cc(dataname, batch_size, hydra_path, condition)
246
+
247
+ n_nodes = node_counts(1000, train_loader, val_loader)
248
+ node_types = torch.tensor([1]) #No node types
249
+ edge_types = edge_counts(train_loader)
250
+
251
+ num_classes = len(node_types)
252
+ max_n_nodes = len(n_nodes) - 1
253
+ nodes_dist = DistributionNodes(n_nodes)
254
+
255
+ print('Distribution of Number of Nodes:', n_nodes)
256
+ print('Distribution of Node Types:', node_types)
257
+ print('Distribution of Edge Types:', edge_types)
258
+
259
+ data_loaders = {'train': train_loader, 'val': val_loader, 'test': test_loader}
260
+
261
+ return data_loaders, num_classes, max_n_nodes, nodes_dist, edge_types, node_types, n_nodes, cond_dims, cond_emb
262
+
263
+
264
+ def node_counts(max_nodes_possible, train_loader, val_loader):
265
+ #Count the distribution of graph size
266
+ all_counts = torch.zeros(max_nodes_possible)
267
+
268
+ for loader in [train_loader, val_loader]:
269
+ for data in loader:
270
+ unique, counts = torch.unique(data.batch, return_counts=True)
271
+ for count in counts:
272
+ all_counts[count] += 1
273
+
274
+ max_index = max(all_counts.nonzero())
275
+ all_counts = all_counts[:max_index + 1]
276
+ all_counts = all_counts / all_counts.sum()
277
+
278
+ return all_counts
279
+
280
+ def node_counts_meta(max_nodes_possible, train_data, val_data, num_classes):
281
+ #Count the distribution of graph size
282
+
283
+ all_counts = [torch.zeros(max_nodes_possible) for _ in range(num_classes)]
284
+
285
+ for dataset in [train_data, val_data]:
286
+ for data in dataset:
287
+ all_counts[data.cond_type.item()][data.x.shape[0]] += 1
288
+
289
+ for _ in range(num_classes):
290
+ tmp = all_counts[_].nonzero()
291
+ if len(tmp) == 0:
292
+ max_index = 1
293
+ all_counts[_][0] = 1
294
+ else:
295
+ max_index = max(tmp)
296
+
297
+ all_counts[_] = all_counts[_][:max_index + 1]
298
+ all_counts[_] = all_counts[_] / all_counts[_].sum()
299
+
300
+ return all_counts
301
+
302
+
303
+ def node_types(train_loader):
304
+ #Count the marginal distribution of node types
305
+ num_classes = None
306
+ for data in train_loader:
307
+ num_classes = data.x.shape[1]
308
+ break
309
+
310
+ counts = torch.zeros(num_classes)
311
+
312
+ for i, data in enumerate(train_loader):
313
+ counts += data.x.sum(dim=0)
314
+
315
+ counts = counts / counts.sum()
316
+ return counts
317
+
318
+ def edge_counts(train_loader):
319
+ #Count the marginal distribution of edge types
320
+ num_classes = None
321
+ for data in train_loader:
322
+ num_classes = data.edge_attr.shape[1]
323
+ break
324
+
325
+ d = torch.zeros(num_classes, dtype=torch.float)
326
+
327
+ for i, data in enumerate(train_loader):
328
+ unique, counts = torch.unique(data.batch, return_counts=True)
329
+
330
+ all_pairs = 0
331
+ for count in counts:
332
+ all_pairs += count * (count - 1)
333
+
334
+
335
+ num_edges = data.edge_index.shape[1]
336
+ num_non_edges = all_pairs - num_edges
337
+
338
+ edge_types = data.edge_attr.sum(dim=0)
339
+ assert num_non_edges >= 0
340
+ d[0] += num_non_edges
341
+ d[1:] += edge_types[1:]
342
+
343
+ d = d / d.sum()
344
+ return d
345
+
346
+
347
+ def edge_counts_meta(train_data, num_classes):
348
+ #Count the marginal distribution of edge types
349
+ num_edge_classes = None
350
+ for data in train_data:
351
+ num_edge_classes = data.edge_attr.shape[1]
352
+ break
353
+
354
+ d = [torch.ones(num_edge_classes, dtype=torch.float) for _ in range(num_classes)]
355
+
356
+ for i, data in enumerate(train_data):
357
+ n_nodes = data.x.shape[0]
358
+
359
+ all_pairs = n_nodes * (n_nodes - 1)
360
+ num_edges = data.edge_index.shape[1]
361
+ num_non_edges = all_pairs - num_edges
362
+
363
+ edge_types = data.edge_attr.sum(dim=0)
364
+ assert num_non_edges >= 0
365
+ d[data.cond_type.item()][0] += num_non_edges
366
+ d[data.cond_type.item()][1:] += edge_types[1:]
367
+
368
+ for i, _ in enumerate(d):
369
+ d[i] = d[i] / d[i].sum()
370
+
371
+ d = torch.stack(d)
372
+
373
+ return d
374
+
375
+
376
+ def compute_input_output_dims(train_loader, extra_features):
377
+ example_batch = next(iter(train_loader))
378
+ ex_dense, node_mask = to_dense(example_batch.x, example_batch.edge_index, example_batch.edge_attr, example_batch.batch)
379
+
380
+ example_data = {'X_t': ex_dense.X, 'E_t': ex_dense.E, 'y_t': example_batch['y'], 'node_mask': node_mask}
381
+
382
+ input_dims = {'X': example_batch['x'].size(1),
383
+ 'E': example_batch['edge_attr'].size(1),
384
+ 'y': example_batch['y'].size(1) + 1} # + 1 due to time conditioning
385
+
386
+ ex_extra_feat = extra_features(example_data)
387
+ input_dims['X'] += ex_extra_feat.X.size(-1)
388
+ input_dims['E'] += ex_extra_feat.E.size(-1)
389
+ input_dims['y'] += ex_extra_feat.y.size(-1)
390
+
391
+ output_dims = {'X': example_batch['x'].size(1),
392
+ 'E': example_batch['edge_attr'].size(1),
393
+ 'y': 0}
394
+
395
+ return input_dims, output_dims
demo_model.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from tqdm import tqdm
5
+
6
+ from models.transformer_model import GraphTransformer
7
+ from diffusion.noise_schedule import DiscreteUniformTransition, PredefinedNoiseScheduleDiscrete
8
+ from diffusion import diffusion_utils
9
+ import utils
10
+ import networkx as nx
11
+ from sentence_transformers import SentenceTransformer
12
+ import pytorch_lightning as pl
13
+
14
+
15
+ class LGGMText2Graph_Demo(pl.LightningModule):
16
+ def __init__(self, cfg, input_dims, output_dims, cond_dims, cond_emb, \
17
+ nodes_dist, node_types, edge_types, extra_features, data_loaders):
18
+ super().__init__()
19
+
20
+ nodes_dist = nodes_dist
21
+
22
+ self.cfg = cfg
23
+ self.T = cfg.model.diffusion_steps
24
+
25
+ self.Xdim = input_dims['X']
26
+ self.Edim = input_dims['E']
27
+ self.ydim = input_dims['y']
28
+ self.Xdim_output = output_dims['X']
29
+ self.Edim_output = output_dims['E']
30
+ self.ydim_output = output_dims['y']
31
+ self.node_dist = nodes_dist
32
+
33
+
34
+ self.extra_features = extra_features
35
+
36
+ self.model = GraphTransformer(n_layers=cfg.model.n_layers,
37
+ input_dims=input_dims,
38
+ hidden_mlp_dims=cfg.model.hidden_mlp_dims,
39
+ hidden_dims=cfg.model.hidden_dims,
40
+ output_dims=output_dims,
41
+ cond_dims = cond_dims,
42
+ act_fn_in=nn.ReLU(),
43
+ act_fn_out=nn.ReLU()).to(self.device)
44
+
45
+
46
+ self.noise_schedule = PredefinedNoiseScheduleDiscrete(cfg.model.diffusion_noise_schedule,
47
+ timesteps=cfg.model.diffusion_steps).to(self.device)
48
+
49
+ self.transition_model = DiscreteUniformTransition(x_classes=self.Xdim_output, e_classes=self.Edim_output,
50
+ y_classes=self.ydim_output)
51
+ x_limit = torch.ones(self.Xdim_output) / self.Xdim_output
52
+ e_limit = torch.ones(self.Edim_output) / self.Edim_output
53
+ y_limit = torch.ones(self.ydim_output) / self.ydim_output
54
+
55
+ self.limit_dist = utils.PlaceHolder(X=x_limit, E=e_limit, y=y_limit)
56
+
57
+
58
+ def generate(self, text, num_nodes) -> None:
59
+ print(num_nodes)
60
+ prompt_emb = torch.tensor(self.text_encoder.encode([text])).to(self.device)
61
+ samples = self.sample_batch(5, cond_emb = prompt_emb, num_nodes = num_nodes)
62
+
63
+ nx_graphs = []
64
+ for graph in samples:
65
+ node_types, edge_types = graph
66
+ A = edge_types.bool().cpu().numpy()
67
+
68
+ nx_graph = nx.from_numpy_array(A)
69
+ nx_graphs.append(nx_graph)
70
+
71
+ return nx_graphs
72
+
73
+ def init_prompt_encoder(self):
74
+ self.text_encoder = SentenceTransformer("all-MiniLM-L6-v2")
75
+
76
+
77
+ @torch.no_grad()
78
+ def sample_batch(self, batch_size: int, cond_emb = None, num_nodes = None):
79
+ """
80
+ :param batch_id: int
81
+ :param batch_size: int
82
+ :param num_nodes: int, <int>tensor (batch_size) (optional) for specifying number of nodes
83
+ :param save_final: int: number of predictions to save to file
84
+ :param keep_chain: int: number of chains to save to file
85
+ :param keep_chain_steps: number of timesteps to save for each chain
86
+ :return: molecule_list. Each element of this list is a tuple (atom_types, charges, positions)
87
+ """
88
+ if num_nodes is None:
89
+ n_nodes = self.node_dist.sample_n(batch_size, self.device)
90
+ elif type(num_nodes) == int:
91
+ n_nodes = num_nodes * torch.ones(batch_size, device=self.device, dtype=torch.int)
92
+
93
+ n_max = torch.max(n_nodes).item()
94
+ # Build the masks
95
+ arange = torch.arange(n_max, device=self.device).unsqueeze(0).expand(batch_size, -1)
96
+ node_mask = arange < n_nodes.unsqueeze(1)
97
+ # Sample noise -- z has size (n_samples, n_nodes, n_features)
98
+
99
+ z_T = diffusion_utils.sample_discrete_feature_noise(limit_dist=self.limit_dist, node_mask=node_mask, transition=self.cfg.model.transition)
100
+ X, E, y = z_T.X, z_T.E, z_T.y
101
+
102
+
103
+ # Iteratively sample p(z_s | z_t) for t = 1, ..., T, with s = t - 1.
104
+ for s_int in tqdm(reversed(range(0, self.T))):
105
+ s_array = s_int * torch.ones((batch_size, 1)).type_as(y)
106
+ t_array = s_array + 1
107
+ s_norm = s_array / self.T
108
+ t_norm = t_array / self.T
109
+
110
+ # Sample z_s
111
+ sampled_s = self.sample_p_zs_given_zt(s_norm, t_norm, X, E, y, node_mask, cond_emb)
112
+ X, E, y = sampled_s.X, sampled_s.E, sampled_s.y
113
+
114
+ # Sample
115
+ sampled_s = sampled_s.mask(node_mask, collapse=True)
116
+ X, E, y = sampled_s.X, sampled_s.E, sampled_s.y
117
+
118
+
119
+ graph_list = []
120
+ for i in range(batch_size):
121
+ n = n_nodes[i]
122
+ node_types = X[i, :n].cpu()
123
+ edge_types = E[i, :n, :n].cpu()
124
+ graph_list.append([node_types, edge_types])
125
+
126
+ return graph_list
127
+
128
+ def sample_p_zs_given_zt(self, s, t, X_t, E_t, y_t, node_mask, cond_emb):
129
+ """Samples from zs ~ p(zs | zt). Only used during sampling.
130
+ if last_step, return the graph prediction as well"""
131
+ bs, n, dxs = X_t.shape
132
+ beta_t = self.noise_schedule(t_normalized=t) # (bs, 1)
133
+ alpha_s_bar = self.noise_schedule.get_alpha_bar(t_normalized=s)
134
+ alpha_t_bar = self.noise_schedule.get_alpha_bar(t_normalized=t)
135
+
136
+
137
+ # Retrieve transitions matrix
138
+ Qtb = self.transition_model.get_Qt_bar(alpha_t_bar, self.device)
139
+ Qsb = self.transition_model.get_Qt_bar(alpha_s_bar, self.device)
140
+ Qt = self.transition_model.get_Qt(beta_t, self.device)
141
+
142
+ noisy_data = {'X_t': X_t, 'E_t': E_t, 'y_t': y_t, 't': t, 'node_mask': node_mask, 'cond_emb': cond_emb.repeat(X_t.shape[0], 1)}
143
+ extra_data = self.compute_extra_data(noisy_data)
144
+ pred = self.forward(noisy_data, extra_data, node_mask)
145
+
146
+ # Normalize predictions
147
+ pred_X = F.softmax(pred.X, dim=-1) # bs, n, d0
148
+ pred_E = F.softmax(pred.E, dim=-1) # bs, n, n, d0
149
+
150
+ p_s_and_t_given_0_X = diffusion_utils.compute_batched_over0_posterior_distribution(X_t=X_t,
151
+ Qt=Qt.X,
152
+ Qsb=Qsb.X,
153
+ Qtb=Qtb.X)
154
+
155
+ p_s_and_t_given_0_E = diffusion_utils.compute_batched_over0_posterior_distribution(X_t=E_t,
156
+ Qt=Qt.E,
157
+ Qsb=Qsb.E,
158
+ Qtb=Qtb.E)
159
+ # Dim of these two tensors: bs, N, d0, d_t-1
160
+ weighted_X = pred_X.unsqueeze(-1) * p_s_and_t_given_0_X # bs, n, d0, d_t-1
161
+ unnormalized_prob_X = weighted_X.sum(dim=2) # bs, n, d_t-1
162
+ unnormalized_prob_X[torch.sum(unnormalized_prob_X, dim=-1) == 0] = 1e-5
163
+ prob_X = unnormalized_prob_X / torch.sum(unnormalized_prob_X, dim=-1, keepdim=True) # bs, n, d_t-1
164
+
165
+ pred_E = pred_E.reshape((bs, -1, pred_E.shape[-1]))
166
+ weighted_E = pred_E.unsqueeze(-1) * p_s_and_t_given_0_E # bs, N, d0, d_t-1
167
+ unnormalized_prob_E = weighted_E.sum(dim=-2)
168
+ unnormalized_prob_E[torch.sum(unnormalized_prob_E, dim=-1) == 0] = 1e-5
169
+ prob_E = unnormalized_prob_E / torch.sum(unnormalized_prob_E, dim=-1, keepdim=True)
170
+ prob_E = prob_E.reshape(bs, n, n, pred_E.shape[-1])
171
+
172
+ assert ((prob_X.sum(dim=-1) - 1).abs() < 1e-4).all()
173
+ assert ((prob_E.sum(dim=-1) - 1).abs() < 1e-4).all()
174
+
175
+
176
+ sampled_s = diffusion_utils.sample_discrete_features(prob_X, prob_E, node_mask=node_mask)
177
+
178
+
179
+ X_s = F.one_hot(sampled_s.X, num_classes=self.Xdim_output).float()
180
+ E_s = F.one_hot(sampled_s.E, num_classes=self.Edim_output).float()
181
+
182
+ assert (E_s == torch.transpose(E_s, 1, 2)).all()
183
+ assert (X_t.shape == X_s.shape) and (E_t.shape == E_s.shape)
184
+
185
+ out_one_hot = utils.PlaceHolder(X=X_s, E=E_s, y=torch.zeros(y_t.shape[0], 0))
186
+
187
+ return out_one_hot.mask(node_mask).type_as(y_t)
188
+
189
+ def compute_extra_data(self, noisy_data):
190
+ """ At every training step (after adding noise) and step in sampling, compute extra information and append to
191
+ the network input. """
192
+
193
+ extra_features = self.extra_features(noisy_data)
194
+
195
+ # print(extra_features.X.shape, extra_features.E.shape, extra_features.y.shape)
196
+ extra_X = extra_features.X
197
+ extra_E = extra_features.E
198
+ extra_y = extra_features.y
199
+
200
+ t = noisy_data['t']
201
+ extra_y = torch.cat((extra_y, t), dim=1)
202
+
203
+ return utils.PlaceHolder(X=extra_X, E=extra_E, y=extra_y)
204
+
205
+ def forward(self, noisy_data, extra_data, node_mask):
206
+ # print(noisy_data['cond_emb'].sum())
207
+ B = noisy_data['cond_emb'].unsqueeze(1).unsqueeze(2).expand(-1, noisy_data['X_t'].shape[1], noisy_data['X_t'].shape[1], -1).to(self.device)
208
+ A = noisy_data['cond_emb'].unsqueeze(1).expand(-1, noisy_data['X_t'].shape[1], -1).to(self.device)
209
+
210
+ X = torch.cat((noisy_data['X_t'], extra_data.X, A), dim=2).float()
211
+ E = torch.cat((noisy_data['E_t'], extra_data.E, B), dim=3).float()
212
+ y = torch.hstack((noisy_data['y_t'], extra_data.y)).float()
213
+
214
+ return self.model(X, E, y, node_mask)
diffusion/__init__.py ADDED
File without changes
diffusion/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (166 Bytes). View file
 
diffusion/__pycache__/diffusion_utils.cpython-39.pyc ADDED
Binary file (13 kB). View file
 
diffusion/__pycache__/noise_schedule.cpython-39.pyc ADDED
Binary file (7.85 kB). View file
 
diffusion/diffusion_utils.py ADDED
@@ -0,0 +1,437 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.nn import functional as F
3
+ import numpy as np
4
+ import math
5
+
6
+
7
+
8
+ class PlaceHolder:
9
+ def __init__(self, X, E, y):
10
+ self.X = X
11
+ self.E = E
12
+ self.y = y
13
+
14
+ def type_as(self, x: torch.Tensor):
15
+ """ Changes the device and dtype of X, E, y. """
16
+ self.X = self.X.type_as(x)
17
+ self.E = self.E.type_as(x)
18
+ self.y = self.y.type_as(x)
19
+ return self
20
+
21
+ def mask(self, node_mask, collapse=False):
22
+ x_mask = node_mask.unsqueeze(-1) # bs, n, 1
23
+ e_mask1 = x_mask.unsqueeze(2) # bs, n, 1, 1
24
+ e_mask2 = x_mask.unsqueeze(1) # bs, 1, n, 1
25
+
26
+ if collapse:
27
+ self.X = torch.argmax(self.X, dim=-1)
28
+ self.E = torch.argmax(self.E, dim=-1)
29
+
30
+ self.X[node_mask == 0] = - 1
31
+ self.E[(e_mask1 * e_mask2).squeeze(-1) == 0] = - 1
32
+ else:
33
+ self.X = self.X * x_mask
34
+ self.E = self.E * e_mask1 * e_mask2
35
+ assert torch.allclose(self.E, torch.transpose(self.E, 1, 2))
36
+ return self
37
+
38
+ def setup_wandb(cfg):
39
+ config_dict = omegaconf.OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True)
40
+ kwargs = {'name': cfg.general.name, 'project': f'graph_ddm_{cfg.dataset.name}', 'config': config_dict,
41
+ 'settings': wandb.Settings(_disable_stats=True), 'reinit': True, 'mode': cfg.general.wandb}
42
+ wandb.init(**kwargs)
43
+ wandb.save('*.txt')
44
+
45
+
46
+ def sum_except_batch(x):
47
+ return x.reshape(x.size(0), -1).sum(dim=-1)
48
+
49
+
50
+ def assert_correctly_masked(variable, node_mask):
51
+ assert (variable * (1 - node_mask.long())).abs().max().item() < 1e-4, \
52
+ 'Variables not masked properly.'
53
+
54
+
55
+ def sample_gaussian(size):
56
+ x = torch.randn(size)
57
+ return x
58
+
59
+
60
+ def sample_gaussian_with_mask(size, node_mask):
61
+ x = torch.randn(size)
62
+ x = x.type_as(node_mask.float())
63
+ x_masked = x * node_mask
64
+ return x_masked
65
+
66
+
67
+ def clip_noise_schedule(alphas2, clip_value=0.001):
68
+ """
69
+ For a noise schedule given by alpha^2, this clips alpha_t / alpha_t-1. This may help improve stability during
70
+ sampling.
71
+ """
72
+ alphas2 = np.concatenate([np.ones(1), alphas2], axis=0)
73
+
74
+ alphas_step = (alphas2[1:] / alphas2[:-1])
75
+
76
+ alphas_step = np.clip(alphas_step, a_min=clip_value, a_max=1.)
77
+ alphas2 = np.cumprod(alphas_step, axis=0)
78
+
79
+ return alphas2
80
+
81
+
82
+ def cosine_beta_schedule(timesteps, s=0.008, raise_to_power: float = 1):
83
+ """
84
+ cosine schedule
85
+ as proposed in https://openreview.net/forum?id=-NEXDKk8gZ
86
+ """
87
+ steps = timesteps + 2
88
+ x = np.linspace(0, steps, steps)
89
+ alphas_cumprod = np.cos(((x / steps) + s) / (1 + s) * np.pi * 0.5) ** 2
90
+ alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
91
+ betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1])
92
+ betas = np.clip(betas, a_min=0, a_max=0.999)
93
+ alphas = 1. - betas
94
+ alphas_cumprod = np.cumprod(alphas, axis=0)
95
+
96
+ if raise_to_power != 1:
97
+ alphas_cumprod = np.power(alphas_cumprod, raise_to_power)
98
+
99
+ return alphas_cumprod
100
+
101
+
102
+ def cosine_beta_schedule_discrete(timesteps, s=0.008):
103
+ """ Cosine schedule as proposed in https://openreview.net/forum?id=-NEXDKk8gZ. """
104
+ steps = timesteps + 2
105
+ x = np.linspace(0, steps, steps)
106
+
107
+ alphas_cumprod = np.cos(0.5 * np.pi * ((x / steps) + s) / (1 + s)) ** 2
108
+ alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
109
+ alphas = (alphas_cumprod[1:] / alphas_cumprod[:-1])
110
+ betas = 1 - alphas
111
+ return betas.squeeze()
112
+
113
+
114
+ def custom_beta_schedule_discrete(timesteps, average_num_nodes=50, s=0.008):
115
+ """ Cosine schedule as proposed in https://openreview.net/forum?id=-NEXDKk8gZ. """
116
+ steps = timesteps + 2
117
+ x = np.linspace(0, steps, steps)
118
+
119
+ alphas_cumprod = np.cos(0.5 * np.pi * ((x / steps) + s) / (1 + s)) ** 2
120
+ alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
121
+ alphas = (alphas_cumprod[1:] / alphas_cumprod[:-1])
122
+ betas = 1 - alphas
123
+
124
+ assert timesteps >= 100
125
+
126
+ p = 4 / 5 # 1 - 1 / num_edge_classes
127
+ num_edges = average_num_nodes * (average_num_nodes - 1) / 2
128
+
129
+ # First 100 steps: only a few updates per graph
130
+ updates_per_graph = 1.2
131
+ beta_first = updates_per_graph / (p * num_edges)
132
+
133
+ betas[betas < beta_first] = beta_first
134
+ return np.array(betas)
135
+
136
+
137
+
138
+ def gaussian_KL(q_mu, q_sigma):
139
+ """Computes the KL distance between a normal distribution and the standard normal.
140
+ Args:
141
+ q_mu: Mean of distribution q.
142
+ q_sigma: Standard deviation of distribution q.
143
+ p_mu: Mean of distribution p.
144
+ p_sigma: Standard deviation of distribution p.
145
+ Returns:
146
+ The KL distance, summed over all dimensions except the batch dim.
147
+ """
148
+ return sum_except_batch((torch.log(1 / q_sigma) + 0.5 * (q_sigma ** 2 + q_mu ** 2) - 0.5))
149
+
150
+
151
+ def cdf_std_gaussian(x):
152
+ return 0.5 * (1. + torch.erf(x / math.sqrt(2)))
153
+
154
+
155
+ def SNR(gamma):
156
+ """Computes signal to noise ratio (alpha^2/sigma^2) given gamma."""
157
+ return torch.exp(-gamma)
158
+
159
+
160
+ def inflate_batch_array(array, target_shape):
161
+ """
162
+ Inflates the batch array (array) with only a single axis (i.e. shape = (batch_size,), or possibly more empty
163
+ axes (i.e. shape (batch_size, 1, ..., 1)) to match the target shape.
164
+ """
165
+ target_shape = (array.size(0),) + (1,) * (len(target_shape) - 1)
166
+ return array.view(target_shape)
167
+
168
+
169
+ def sigma(gamma, target_shape):
170
+ """Computes sigma given gamma."""
171
+ return inflate_batch_array(torch.sqrt(torch.sigmoid(gamma)), target_shape)
172
+
173
+
174
+ def alpha(gamma, target_shape):
175
+ """Computes alpha given gamma."""
176
+ return inflate_batch_array(torch.sqrt(torch.sigmoid(-gamma)), target_shape)
177
+
178
+
179
+ def check_mask_correct(variables, node_mask):
180
+ for i, variable in enumerate(variables):
181
+ if len(variable) > 0:
182
+ assert_correctly_masked(variable, node_mask)
183
+
184
+
185
+ def check_tensor_same_size(*args):
186
+ for i, arg in enumerate(args):
187
+ if i == 0:
188
+ continue
189
+ assert args[0].size() == arg.size()
190
+
191
+
192
+ def sigma_and_alpha_t_given_s(gamma_t: torch.Tensor, gamma_s: torch.Tensor, target_size: torch.Size):
193
+ """
194
+ Computes sigma t given s, using gamma_t and gamma_s. Used during sampling.
195
+
196
+ These are defined as:
197
+ alpha t given s = alpha t / alpha s,
198
+ sigma t given s = sqrt(1 - (alpha t given s) ^2 ).
199
+ """
200
+ sigma2_t_given_s = inflate_batch_array(
201
+ -torch.expm1(F.softplus(gamma_s) - F.softplus(gamma_t)), target_size
202
+ )
203
+
204
+ # alpha_t_given_s = alpha_t / alpha_s
205
+ log_alpha2_t = F.logsigmoid(-gamma_t)
206
+ log_alpha2_s = F.logsigmoid(-gamma_s)
207
+ log_alpha2_t_given_s = log_alpha2_t - log_alpha2_s
208
+
209
+ alpha_t_given_s = torch.exp(0.5 * log_alpha2_t_given_s)
210
+ alpha_t_given_s = inflate_batch_array(alpha_t_given_s, target_size)
211
+
212
+ sigma_t_given_s = torch.sqrt(sigma2_t_given_s)
213
+
214
+ return sigma2_t_given_s, sigma_t_given_s, alpha_t_given_s
215
+
216
+
217
+ def reverse_tensor(x):
218
+ return x[torch.arange(x.size(0) - 1, -1, -1)]
219
+
220
+
221
+ def sample_feature_noise(X_size, E_size, y_size, node_mask):
222
+ """Standard normal noise for all features.
223
+ Output size: X.size(), E.size(), y.size() """
224
+ # TODO: How to change this for the multi-gpu case?
225
+ epsX = sample_gaussian(X_size)
226
+ epsE = sample_gaussian(E_size)
227
+ epsy = sample_gaussian(y_size)
228
+
229
+ float_mask = node_mask.float()
230
+ epsX = epsX.type_as(float_mask)
231
+ epsE = epsE.type_as(float_mask)
232
+ epsy = epsy.type_as(float_mask)
233
+
234
+ # Get upper triangular part of edge noise, without main diagonal
235
+ upper_triangular_mask = torch.zeros_like(epsE)
236
+ indices = torch.triu_indices(row=epsE.size(1), col=epsE.size(2), offset=1)
237
+ upper_triangular_mask[:, indices[0], indices[1], :] = 1
238
+
239
+ epsE = epsE * upper_triangular_mask
240
+ epsE = (epsE + torch.transpose(epsE, 1, 2))
241
+
242
+ assert (epsE == torch.transpose(epsE, 1, 2)).all()
243
+
244
+ return PlaceHolder(X=epsX, E=epsE, y=epsy).mask(node_mask)
245
+
246
+
247
+ def sample_normal(mu_X, mu_E, mu_y, sigma, node_mask):
248
+ """Samples from a Normal distribution."""
249
+ # TODO: change for multi-gpu case
250
+ eps = sample_feature_noise(mu_X.size(), mu_E.size(), mu_y.size(), node_mask).type_as(mu_X)
251
+ X = mu_X + sigma * eps.X
252
+ E = mu_E + sigma.unsqueeze(1) * eps.E
253
+ y = mu_y + sigma.squeeze(1) * eps.y
254
+ return PlaceHolder(X=X, E=E, y=y)
255
+
256
+
257
+ def check_issues_norm_values(gamma, norm_val1, norm_val2, num_stdevs=8):
258
+ """ Check if 1 / norm_value is still larger than 10 * standard deviation. """
259
+ zeros = torch.zeros((1, 1))
260
+ gamma_0 = gamma(zeros)
261
+ sigma_0 = sigma(gamma_0, target_shape=zeros.size()).item()
262
+ max_norm_value = max(norm_val1, norm_val2)
263
+ if sigma_0 * num_stdevs > 1. / max_norm_value:
264
+ raise ValueError(
265
+ f'Value for normalization value {max_norm_value} probably too '
266
+ f'large with sigma_0 {sigma_0:.5f} and '
267
+ f'1 / norm_value = {1. / max_norm_value}')
268
+
269
+
270
+ def sample_discrete_features(probX, probE, node_mask):
271
+ ''' Sample features from multinomial distribution with given probabilities (probX, probE, proby)
272
+ :param probX: bs, n, dx_out node features
273
+ :param probE: bs, n, n, de_out edge features
274
+ :param proby: bs, dy_out global features.
275
+ '''
276
+ bs, n, _ = probX.shape
277
+ # Noise X
278
+ # The masked rows should define probability distributions as well
279
+ probX[~node_mask] = 1 / probX.shape[-1]
280
+
281
+ # Flatten the probability tensor to sample with multinomial
282
+ probX = probX.reshape(bs * n, -1) # (bs * n, dx_out)
283
+
284
+ # Sample X
285
+ X_t = probX.multinomial(1) # (bs * n, 1)
286
+ X_t = X_t.reshape(bs, n) # (bs, n)
287
+
288
+ # Noise E
289
+ # The masked rows should define probability distributions as well
290
+ inverse_edge_mask = ~(node_mask.unsqueeze(1) * node_mask.unsqueeze(2))
291
+ diag_mask = torch.eye(n).unsqueeze(0).expand(bs, -1, -1)
292
+
293
+ probE[inverse_edge_mask] = 1 / probE.shape[-1]
294
+ probE[diag_mask.bool()] = 1 / probE.shape[-1]
295
+
296
+ probE = probE.reshape(bs * n * n, -1) # (bs * n * n, de_out)
297
+
298
+ # Sample E
299
+ E_t = probE.multinomial(1).reshape(bs, n, n) # (bs, n, n)
300
+ E_t = torch.triu(E_t, diagonal=1)
301
+ E_t = (E_t + torch.transpose(E_t, 1, 2))
302
+
303
+ return PlaceHolder(X=X_t, E=E_t, y=torch.zeros(bs, 0).type_as(X_t))
304
+
305
+
306
+ def compute_posterior_distribution(M, M_t, Qt_M, Qsb_M, Qtb_M):
307
+ ''' M: X or E
308
+ Compute xt @ Qt.T * x0 @ Qsb / x0 @ Qtb @ xt.T
309
+ '''
310
+ # Flatten feature tensors
311
+ M = M.flatten(start_dim=1, end_dim=-2).to(torch.float32) # (bs, N, d) with N = n or n * n
312
+ M_t = M_t.flatten(start_dim=1, end_dim=-2).to(torch.float32) # same
313
+
314
+ Qt_M_T = torch.transpose(Qt_M, -2, -1) # (bs, d, d)
315
+
316
+ left_term = M_t @ Qt_M_T # (bs, N, d)
317
+ right_term = M @ Qsb_M # (bs, N, d)
318
+ product = left_term * right_term # (bs, N, d)
319
+
320
+ denom = M @ Qtb_M # (bs, N, d) @ (bs, d, d) = (bs, N, d)
321
+ denom = (denom * M_t).sum(dim=-1) # (bs, N, d) * (bs, N, d) + sum = (bs, N)
322
+ # denom = product.sum(dim=-1)
323
+ # denom[denom == 0.] = 1
324
+
325
+ prob = product / denom.unsqueeze(-1) # (bs, N, d)
326
+
327
+ return prob
328
+
329
+
330
+ def compute_batched_over0_posterior_distribution(X_t, Qt, Qsb, Qtb):
331
+ """ M: X or E
332
+ Compute xt @ Qt.T * x0 @ Qsb / x0 @ Qtb @ xt.T for each possible value of x0
333
+ X_t: bs, n, dt or bs, n, n, dt
334
+ Qt: bs, d_t-1, dt
335
+ Qsb: bs, d0, d_t-1
336
+ Qtb: bs, d0, dt.
337
+ """
338
+ # Flatten feature tensors
339
+ # Careful with this line. It does nothing if X is a node feature. If X is an edge features it maps to
340
+ # bs x (n ** 2) x d
341
+ X_t = X_t.flatten(start_dim=1, end_dim=-2).to(torch.float32) # bs x N x dt
342
+
343
+ Qt_T = Qt.transpose(-1, -2) # bs, dt, d_t-1
344
+ left_term = X_t @ Qt_T # bs, N, d_t-1
345
+ left_term = left_term.unsqueeze(dim=2) # bs, N, 1, d_t-1
346
+
347
+ right_term = Qsb.unsqueeze(1) # bs, 1, d0, d_t-1
348
+ numerator = left_term * right_term # bs, N, d0, d_t-1
349
+
350
+ X_t_transposed = X_t.transpose(-1, -2) # bs, dt, N
351
+
352
+ prod = Qtb @ X_t_transposed # bs, d0, N
353
+ prod = prod.transpose(-1, -2) # bs, N, d0
354
+ denominator = prod.unsqueeze(-1) # bs, N, d0, 1
355
+ denominator[denominator == 0] = 1e-6
356
+
357
+ out = numerator / denominator
358
+ return out
359
+
360
+
361
+ def mask_distributions(true_X, true_E, pred_X, pred_E, node_mask):
362
+ """
363
+ Set masked rows to arbitrary distributions, so it doesn't contribute to loss
364
+ :param true_X: bs, n, dx_out
365
+ :param true_E: bs, n, n, de_out
366
+ :param pred_X: bs, n, dx_out
367
+ :param pred_E: bs, n, n, de_out
368
+ :param node_mask: bs, n
369
+ :return: same sizes as input
370
+ """
371
+
372
+ row_X = torch.zeros(true_X.size(-1), dtype=torch.float, device=true_X.device)
373
+ row_X[0] = 1.
374
+ row_E = torch.zeros(true_E.size(-1), dtype=torch.float, device=true_E.device)
375
+ row_E[0] = 1.
376
+
377
+ diag_mask = ~torch.eye(node_mask.size(1), device=node_mask.device, dtype=torch.bool).unsqueeze(0)
378
+ true_X[~node_mask] = row_X
379
+ pred_X[~node_mask] = row_X
380
+ true_E[~(node_mask.unsqueeze(1) * node_mask.unsqueeze(2) * diag_mask), :] = row_E
381
+ pred_E[~(node_mask.unsqueeze(1) * node_mask.unsqueeze(2) * diag_mask), :] = row_E
382
+
383
+ true_X = true_X + 1e-7
384
+ pred_X = pred_X + 1e-7
385
+ true_E = true_E + 1e-7
386
+ pred_E = pred_E + 1e-7
387
+
388
+ true_X = true_X / torch.sum(true_X, dim=-1, keepdim=True)
389
+ pred_X = pred_X / torch.sum(pred_X, dim=-1, keepdim=True)
390
+ true_E = true_E / torch.sum(true_E, dim=-1, keepdim=True)
391
+ pred_E = pred_E / torch.sum(pred_E, dim=-1, keepdim=True)
392
+
393
+ return true_X, true_E, pred_X, pred_E
394
+
395
+
396
+ def posterior_distributions(X, E, y, X_t, E_t, y_t, Qt, Qsb, Qtb):
397
+ prob_X = compute_posterior_distribution(M=X, M_t=X_t, Qt_M=Qt.X, Qsb_M=Qsb.X, Qtb_M=Qtb.X) # (bs, n, dx)
398
+ prob_E = compute_posterior_distribution(M=E, M_t=E_t, Qt_M=Qt.E, Qsb_M=Qsb.E, Qtb_M=Qtb.E) # (bs, n * n, de)
399
+
400
+ return PlaceHolder(X=prob_X, E=prob_E, y=y_t)
401
+
402
+
403
+ def sample_discrete_feature_noise(limit_dist, node_mask, transition):
404
+ """ Sample from the limit distribution of the diffusion process"""
405
+ bs, n_max = node_mask.shape
406
+
407
+ x_limit = limit_dist.X[None, None, :].expand(bs, n_max, -1)
408
+ e_limit = limit_dist.E[None, None, None, :].expand(bs, n_max, n_max, -1)
409
+ y_limit = limit_dist.y[None, :].expand(bs, -1)
410
+
411
+ U_X = x_limit.flatten(end_dim=-2).multinomial(1).reshape(bs, n_max)
412
+ U_E = e_limit.flatten(end_dim=-2).multinomial(1).reshape(bs, n_max, n_max)
413
+ # print(U_E.shape, U_X.shape, y_limit.shape)
414
+ U_y = torch.empty((bs, 0))
415
+
416
+ long_mask = node_mask.long()
417
+ U_X = U_X.type_as(long_mask)
418
+ U_E = U_E.type_as(long_mask)
419
+ U_y = U_y.type_as(long_mask)
420
+
421
+ U_X = F.one_hot(U_X, num_classes=x_limit.shape[-1]).float()
422
+ U_E = F.one_hot(U_E, num_classes=e_limit.shape[-1]).float()
423
+
424
+ # Get upper triangular part of edge noise, without main diagonal
425
+ upper_triangular_mask = torch.zeros_like(U_E)
426
+ indices = torch.triu_indices(row=U_E.size(1), col=U_E.size(2), offset=1)
427
+ upper_triangular_mask[:, indices[0], indices[1], :] = 1
428
+
429
+ U_E = U_E * upper_triangular_mask
430
+ U_E = (U_E + torch.transpose(U_E, 1, 2))
431
+
432
+ assert (U_E == torch.transpose(U_E, 1, 2)).all()
433
+
434
+ # print(U_X.shape, limit_dist.cond.shape)
435
+ return PlaceHolder(X=U_X, E=U_E, y=U_y).mask(node_mask)
436
+
437
+
diffusion/distributions.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+
4
+ class DistributionNodes:
5
+ def __init__(self, histogram):
6
+ """ Compute the distribution of the number of nodes in the dataset, and sample from this distribution.
7
+ historgram: dict. The keys are num_nodes, the values are counts
8
+ """
9
+
10
+ if type(histogram) == dict:
11
+ max_n_nodes = max(histogram.keys())
12
+ prob = torch.zeros(max_n_nodes + 1)
13
+ for num_nodes, count in histogram.items():
14
+ prob[num_nodes] = count
15
+ else:
16
+ prob = histogram
17
+
18
+ self.prob = prob / prob.sum()
19
+ self.m = torch.distributions.Categorical(prob)
20
+
21
+ def sample_n(self, n_samples, device):
22
+ idx = self.m.sample((n_samples,))
23
+ return idx.to(device)
24
+
25
+ def log_prob(self, batch_n_nodes):
26
+ assert len(batch_n_nodes.size()) == 1
27
+ p = self.prob.to(batch_n_nodes.device)
28
+
29
+ probas = p[batch_n_nodes]
30
+ log_p = torch.log(probas + 1e-30)
31
+ return log_p
32
+
diffusion/extra_features.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from src import utils
3
+
4
+
5
+ class DummyExtraFeatures:
6
+ def __init__(self):
7
+ """ This class does not compute anything, just returns empty tensors."""
8
+
9
+ def __call__(self, noisy_data):
10
+ X = noisy_data['X_t']
11
+ E = noisy_data['E_t']
12
+ y = noisy_data['y_t']
13
+ empty_x = X.new_zeros((*X.shape[:-1], 0))
14
+ empty_e = E.new_zeros((*E.shape[:-1], 0))
15
+ empty_y = y.new_zeros((y.shape[0], 0))
16
+ return utils.PlaceHolder(X=empty_x, E=empty_e, y=empty_y)
17
+
18
+
19
+ class ExtraFeatures:
20
+ def __init__(self, extra_features_type, dataset_info):
21
+ self.max_n_nodes = dataset_info.max_n_nodes
22
+ self.ncycles = NodeCycleFeatures()
23
+ self.features_type = extra_features_type
24
+ if extra_features_type in ['eigenvalues', 'all']:
25
+ self.eigenfeatures = EigenFeatures(mode=extra_features_type)
26
+
27
+ def __call__(self, noisy_data):
28
+ n = noisy_data['node_mask'].sum(dim=1).unsqueeze(1) / self.max_n_nodes
29
+ x_cycles, y_cycles = self.ncycles(noisy_data) # (bs, n_cycles)
30
+
31
+ if self.features_type == 'cycles':
32
+ E = noisy_data['E_t']
33
+ extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E)
34
+ return utils.PlaceHolder(X=x_cycles, E=extra_edge_attr, y=torch.hstack((n, y_cycles)))
35
+
36
+ elif self.features_type == 'eigenvalues':
37
+ eigenfeatures = self.eigenfeatures(noisy_data)
38
+ E = noisy_data['E_t']
39
+ extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E)
40
+ n_components, batched_eigenvalues = eigenfeatures # (bs, 1), (bs, 10)
41
+ return utils.PlaceHolder(X=x_cycles, E=extra_edge_attr, y=torch.hstack((n, y_cycles, n_components,
42
+ batched_eigenvalues)))
43
+ elif self.features_type == 'all':
44
+ eigenfeatures = self.eigenfeatures(noisy_data)
45
+ E = noisy_data['E_t']
46
+ extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E)
47
+ n_components, batched_eigenvalues, nonlcc_indicator, k_lowest_eigvec = eigenfeatures # (bs, 1), (bs, 10),
48
+ # (bs, n, 1), (bs, n, 2)
49
+
50
+ return utils.PlaceHolder(X=torch.cat((x_cycles, nonlcc_indicator, k_lowest_eigvec), dim=-1),
51
+ E=extra_edge_attr,
52
+ y=torch.hstack((n, y_cycles, n_components, batched_eigenvalues)))
53
+ else:
54
+ raise ValueError(f"Features type {self.features_type} not implemented")
55
+
56
+
57
+ class NodeCycleFeatures:
58
+ def __init__(self):
59
+ self.kcycles = KNodeCycles()
60
+
61
+ def __call__(self, noisy_data):
62
+ adj_matrix = noisy_data['E_t'][..., 1:].sum(dim=-1).float()
63
+
64
+ x_cycles, y_cycles = self.kcycles.k_cycles(adj_matrix=adj_matrix) # (bs, n_cycles)
65
+ x_cycles = x_cycles.type_as(adj_matrix) * noisy_data['node_mask'].unsqueeze(-1)
66
+ # Avoid large values when the graph is dense
67
+ x_cycles = x_cycles / 10
68
+ y_cycles = y_cycles / 10
69
+ x_cycles[x_cycles > 1] = 1
70
+ y_cycles[y_cycles > 1] = 1
71
+ return x_cycles, y_cycles
72
+
73
+
74
+ class EigenFeatures:
75
+ """
76
+ Code taken from : https://github.com/Saro00/DGN/blob/master/models/pytorch/eigen_agg.py
77
+ """
78
+ def __init__(self, mode):
79
+ """ mode: 'eigenvalues' or 'all' """
80
+ self.mode = mode
81
+
82
+ def __call__(self, noisy_data):
83
+ E_t = noisy_data['E_t']
84
+ mask = noisy_data['node_mask']
85
+ A = E_t[..., 1:].sum(dim=-1).float() * mask.unsqueeze(1) * mask.unsqueeze(2)
86
+ L = compute_laplacian(A, normalize=False)
87
+ mask_diag = 2 * L.shape[-1] * torch.eye(A.shape[-1]).type_as(L).unsqueeze(0)
88
+ mask_diag = mask_diag * (~mask.unsqueeze(1)) * (~mask.unsqueeze(2))
89
+ L = L * mask.unsqueeze(1) * mask.unsqueeze(2) + mask_diag
90
+
91
+ if self.mode == 'eigenvalues':
92
+ eigvals = torch.linalg.eigvalsh(L) # bs, n
93
+ eigvals = eigvals.type_as(A) / torch.sum(mask, dim=1, keepdim=True)
94
+
95
+ n_connected_comp, batch_eigenvalues = get_eigenvalues_features(eigenvalues=eigvals)
96
+ return n_connected_comp.type_as(A), batch_eigenvalues.type_as(A)
97
+
98
+ elif self.mode == 'all':
99
+ eigvals, eigvectors = torch.linalg.eigh(L)
100
+ eigvals = eigvals.type_as(A) / torch.sum(mask, dim=1, keepdim=True)
101
+ eigvectors = eigvectors * mask.unsqueeze(2) * mask.unsqueeze(1)
102
+ # Retrieve eigenvalues features
103
+ n_connected_comp, batch_eigenvalues = get_eigenvalues_features(eigenvalues=eigvals)
104
+
105
+ # Retrieve eigenvectors features
106
+ nonlcc_indicator, k_lowest_eigenvector = get_eigenvectors_features(vectors=eigvectors,
107
+ node_mask=noisy_data['node_mask'],
108
+ n_connected=n_connected_comp)
109
+ return n_connected_comp, batch_eigenvalues, nonlcc_indicator, k_lowest_eigenvector
110
+ else:
111
+ raise NotImplementedError(f"Mode {self.mode} is not implemented")
112
+
113
+
114
+ def compute_laplacian(adjacency, normalize: bool):
115
+ """
116
+ adjacency : batched adjacency matrix (bs, n, n)
117
+ normalize: can be None, 'sym' or 'rw' for the combinatorial, symmetric normalized or random walk Laplacians
118
+ Return:
119
+ L (n x n ndarray): combinatorial or symmetric normalized Laplacian.
120
+ """
121
+ diag = torch.sum(adjacency, dim=-1) # (bs, n)
122
+ n = diag.shape[-1]
123
+ D = torch.diag_embed(diag) # Degree matrix # (bs, n, n)
124
+ combinatorial = D - adjacency # (bs, n, n)
125
+
126
+ if not normalize:
127
+ return (combinatorial + combinatorial.transpose(1, 2)) / 2
128
+
129
+ diag0 = diag.clone()
130
+ diag[diag == 0] = 1e-12
131
+
132
+ diag_norm = 1 / torch.sqrt(diag) # (bs, n)
133
+ D_norm = torch.diag_embed(diag_norm) # (bs, n, n)
134
+ L = torch.eye(n).unsqueeze(0) - D_norm @ adjacency @ D_norm
135
+ L[diag0 == 0] = 0
136
+ return (L + L.transpose(1, 2)) / 2
137
+
138
+
139
+ def get_eigenvalues_features(eigenvalues, k=5):
140
+ """
141
+ values : eigenvalues -- (bs, n)
142
+ node_mask: (bs, n)
143
+ k: num of non zero eigenvalues to keep
144
+ """
145
+ ev = eigenvalues
146
+ bs, n = ev.shape
147
+ n_connected_components = (ev < 1e-5).sum(dim=-1)
148
+ assert (n_connected_components > 0).all(), (n_connected_components, ev)
149
+
150
+ to_extend = max(n_connected_components) + k - n
151
+ if to_extend > 0:
152
+ eigenvalues = torch.hstack((eigenvalues, 2 * torch.ones(bs, to_extend).type_as(eigenvalues)))
153
+ indices = torch.arange(k).type_as(eigenvalues).long().unsqueeze(0) + n_connected_components.unsqueeze(1)
154
+ first_k_ev = torch.gather(eigenvalues, dim=1, index=indices)
155
+ return n_connected_components.unsqueeze(-1), first_k_ev
156
+
157
+
158
+ def get_eigenvectors_features(vectors, node_mask, n_connected, k=2):
159
+ """
160
+ vectors (bs, n, n) : eigenvectors of Laplacian IN COLUMNS
161
+ returns:
162
+ not_lcc_indicator : indicator vectors of largest connected component (lcc) for each graph -- (bs, n, 1)
163
+ k_lowest_eigvec : k first eigenvectors for the largest connected component -- (bs, n, k)
164
+ """
165
+ bs, n = vectors.size(0), vectors.size(1)
166
+
167
+ # Create an indicator for the nodes outside the largest connected components
168
+ first_ev = torch.round(vectors[:, :, 0], decimals=3) * node_mask # bs, n
169
+ # Add random value to the mask to prevent 0 from becoming the mode
170
+ random = torch.randn(bs, n, device=node_mask.device) * (~node_mask) # bs, n
171
+ first_ev = first_ev + random
172
+ most_common = torch.mode(first_ev, dim=1).values # values: bs -- indices: bs
173
+ mask = ~ (first_ev == most_common.unsqueeze(1))
174
+ not_lcc_indicator = (mask * node_mask).unsqueeze(-1).float()
175
+
176
+ # Get the eigenvectors corresponding to the first nonzero eigenvalues
177
+ to_extend = max(n_connected) + k - n
178
+ if to_extend > 0:
179
+ vectors = torch.cat((vectors, torch.zeros(bs, n, to_extend).type_as(vectors)), dim=2) # bs, n , n + to_extend
180
+ indices = torch.arange(k).type_as(vectors).long().unsqueeze(0).unsqueeze(0) + n_connected.unsqueeze(2) # bs, 1, k
181
+ indices = indices.expand(-1, n, -1) # bs, n, k
182
+ first_k_ev = torch.gather(vectors, dim=2, index=indices) # bs, n, k
183
+ first_k_ev = first_k_ev * node_mask.unsqueeze(2)
184
+
185
+ return not_lcc_indicator, first_k_ev
186
+
187
+ def batch_trace(X):
188
+ """
189
+ Expect a matrix of shape B N N, returns the trace in shape B
190
+ :param X:
191
+ :return:
192
+ """
193
+ diag = torch.diagonal(X, dim1=-2, dim2=-1)
194
+ trace = diag.sum(dim=-1)
195
+ return trace
196
+
197
+
198
+ def batch_diagonal(X):
199
+ """
200
+ Extracts the diagonal from the last two dims of a tensor
201
+ :param X:
202
+ :return:
203
+ """
204
+ return torch.diagonal(X, dim1=-2, dim2=-1)
205
+
206
+
207
+ class KNodeCycles:
208
+ """ Builds cycle counts for each node in a graph.
209
+ """
210
+
211
+ def __init__(self):
212
+ super().__init__()
213
+
214
+ def calculate_kpowers(self):
215
+ self.k1_matrix = self.adj_matrix.float()
216
+ self.d = self.adj_matrix.sum(dim=-1)
217
+ self.k2_matrix = self.k1_matrix @ self.adj_matrix.float()
218
+ self.k3_matrix = self.k2_matrix @ self.adj_matrix.float()
219
+ self.k4_matrix = self.k3_matrix @ self.adj_matrix.float()
220
+ self.k5_matrix = self.k4_matrix @ self.adj_matrix.float()
221
+ self.k6_matrix = self.k5_matrix @ self.adj_matrix.float()
222
+
223
+ def k3_cycle(self):
224
+ """ tr(A ** 3). """
225
+ c3 = batch_diagonal(self.k3_matrix)
226
+ return (c3 / 2).unsqueeze(-1).float(), (torch.sum(c3, dim=-1) / 6).unsqueeze(-1).float()
227
+
228
+ def k4_cycle(self):
229
+ diag_a4 = batch_diagonal(self.k4_matrix)
230
+ c4 = diag_a4 - self.d * (self.d - 1) - (self.adj_matrix @ self.d.unsqueeze(-1)).sum(dim=-1)
231
+ return (c4 / 2).unsqueeze(-1).float(), (torch.sum(c4, dim=-1) / 8).unsqueeze(-1).float()
232
+
233
+ def k5_cycle(self):
234
+ diag_a5 = batch_diagonal(self.k5_matrix)
235
+ triangles = batch_diagonal(self.k3_matrix)
236
+ c5 = diag_a5 - 2 * triangles * self.d - (self.adj_matrix @ triangles.unsqueeze(-1)).sum(dim=-1) + triangles
237
+ return (c5 / 2).unsqueeze(-1).float(), (c5.sum(dim=-1) / 10).unsqueeze(-1).float()
238
+
239
+ def k6_cycle(self):
240
+ term_1_t = batch_trace(self.k6_matrix)
241
+ term_2_t = batch_trace(self.k3_matrix ** 2)
242
+ term3_t = torch.sum(self.adj_matrix * self.k2_matrix.pow(2), dim=[-2, -1])
243
+ d_t4 = batch_diagonal(self.k2_matrix)
244
+ a_4_t = batch_diagonal(self.k4_matrix)
245
+ term_4_t = (d_t4 * a_4_t).sum(dim=-1)
246
+ term_5_t = batch_trace(self.k4_matrix)
247
+ term_6_t = batch_trace(self.k3_matrix)
248
+ term_7_t = batch_diagonal(self.k2_matrix).pow(3).sum(-1)
249
+ term8_t = torch.sum(self.k3_matrix, dim=[-2, -1])
250
+ term9_t = batch_diagonal(self.k2_matrix).pow(2).sum(-1)
251
+ term10_t = batch_trace(self.k2_matrix)
252
+
253
+ c6_t = (term_1_t - 3 * term_2_t + 9 * term3_t - 6 * term_4_t + 6 * term_5_t - 4 * term_6_t + 4 * term_7_t +
254
+ 3 * term8_t - 12 * term9_t + 4 * term10_t)
255
+ return None, (c6_t / 12).unsqueeze(-1).float()
256
+
257
+ def k_cycles(self, adj_matrix, verbose=False):
258
+ self.adj_matrix = adj_matrix
259
+ self.calculate_kpowers()
260
+
261
+ k3x, k3y = self.k3_cycle()
262
+ assert (k3x >= -0.1).all()
263
+
264
+ k4x, k4y = self.k4_cycle()
265
+ assert (k4x >= -0.1).all()
266
+
267
+ k5x, k5y = self.k5_cycle()
268
+ assert (k5x >= -0.1).all(), k5x
269
+
270
+ _, k6y = self.k6_cycle()
271
+ assert (k6y >= -0.1).all()
272
+
273
+ kcyclesx = torch.cat([k3x, k4x, k5x], dim=-1)
274
+ kcyclesy = torch.cat([k3y, k4y, k5y, k6y], dim=-1)
275
+ return kcyclesx, kcyclesy
diffusion/extra_features_molecular.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from src import utils
3
+
4
+
5
+ class ExtraMolecularFeatures:
6
+ def __init__(self, dataset_infos):
7
+ self.charge = ChargeFeature(remove_h=dataset_infos.remove_h, valencies=dataset_infos.valencies)
8
+ self.valency = ValencyFeature()
9
+ self.weight = WeightFeature(max_weight=dataset_infos.max_weight, atom_weights=dataset_infos.atom_weights)
10
+
11
+ def __call__(self, noisy_data):
12
+ charge = self.charge(noisy_data).unsqueeze(-1) # (bs, n, 1)
13
+ valency = self.valency(noisy_data).unsqueeze(-1) # (bs, n, 1)
14
+ weight = self.weight(noisy_data) # (bs, 1)
15
+
16
+ extra_edge_attr = torch.zeros((*noisy_data['E_t'].shape[:-1], 0)).type_as(noisy_data['E_t'])
17
+
18
+ return utils.PlaceHolder(X=torch.cat((charge, valency), dim=-1), E=extra_edge_attr, y=weight)
19
+
20
+
21
+ class ChargeFeature:
22
+ def __init__(self, remove_h, valencies):
23
+ self.remove_h = remove_h
24
+ self.valencies = valencies
25
+
26
+ def __call__(self, noisy_data):
27
+ bond_orders = torch.tensor([0, 1, 2, 3, 1.5], device=noisy_data['E_t'].device).reshape(1, 1, 1, -1)
28
+ weighted_E = noisy_data['E_t'] * bond_orders # (bs, n, n, de)
29
+ current_valencies = weighted_E.argmax(dim=-1).sum(dim=-1) # (bs, n)
30
+
31
+ valencies = torch.tensor(self.valencies, device=noisy_data['X_t'].device).reshape(1, 1, -1)
32
+ X = noisy_data['X_t'] * valencies # (bs, n, dx)
33
+ normal_valencies = torch.argmax(X, dim=-1) # (bs, n)
34
+
35
+ return (normal_valencies - current_valencies).type_as(noisy_data['X_t'])
36
+
37
+
38
+ class ValencyFeature:
39
+ def __init__(self):
40
+ pass
41
+
42
+ def __call__(self, noisy_data):
43
+ orders = torch.tensor([0, 1, 2, 3, 1.5], device=noisy_data['E_t'].device).reshape(1, 1, 1, -1)
44
+ E = noisy_data['E_t'] * orders # (bs, n, n, de)
45
+ valencies = E.argmax(dim=-1).sum(dim=-1) # (bs, n)
46
+ return valencies.type_as(noisy_data['X_t'])
47
+
48
+
49
+ class WeightFeature:
50
+ def __init__(self, max_weight, atom_weights):
51
+ self.max_weight = max_weight
52
+ self.atom_weight_list = torch.tensor(list(atom_weights.values()))
53
+
54
+ def __call__(self, noisy_data):
55
+ X = torch.argmax(noisy_data['X_t'], dim=-1) # (bs, n)
56
+ X_weights = self.atom_weight_list.to(X.device)[X] # (bs, n)
57
+ return X_weights.sum(dim=-1).unsqueeze(-1).type_as(noisy_data['X_t']) / self.max_weight # (bs, 1)
diffusion/layers.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import torch
3
+
4
+
5
+ class SinusoidalPosEmb(torch.nn.Module):
6
+ def __init__(self, dim):
7
+ super().__init__()
8
+ self.dim = dim
9
+
10
+ def forward(self, x):
11
+ x = x.squeeze() * 1000
12
+ assert len(x.shape) == 1
13
+ half_dim = self.dim // 2
14
+ emb = math.log(10000) / (half_dim - 1)
15
+ emb = torch.exp(torch.arange(half_dim) * -emb)
16
+ emb = emb.type_as(x)
17
+ emb = x[:, None] * emb[None, :]
18
+ emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
19
+ return emb
diffusion/noise_schedule.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import utils
4
+ from diffusion import diffusion_utils
5
+
6
+
7
+ class PredefinedNoiseSchedule(torch.nn.Module):
8
+ """
9
+ Predefined noise schedule. Essentially creates a lookup array for predefined (non-learned) noise schedules.
10
+ """
11
+
12
+ def __init__(self, noise_schedule, timesteps):
13
+ super(PredefinedNoiseSchedule, self).__init__()
14
+ self.timesteps = timesteps
15
+
16
+ if noise_schedule == 'cosine':
17
+ alphas2 = diffusion_utils.cosine_beta_schedule(timesteps)
18
+ elif noise_schedule == 'custom':
19
+ raise NotImplementedError()
20
+ else:
21
+ raise ValueError(noise_schedule)
22
+
23
+ # print('alphas2', alphas2)
24
+
25
+ sigmas2 = 1 - alphas2
26
+
27
+ log_alphas2 = np.log(alphas2)
28
+ log_sigmas2 = np.log(sigmas2)
29
+
30
+ log_alphas2_to_sigmas2 = log_alphas2 - log_sigmas2 # (timesteps + 1, )
31
+
32
+ # print('gamma', -log_alphas2_to_sigmas2)
33
+
34
+ self.gamma = torch.nn.Parameter(
35
+ torch.from_numpy(-log_alphas2_to_sigmas2).float(),
36
+ requires_grad=False)
37
+
38
+ def forward(self, t):
39
+ t_int = torch.round(t * self.timesteps).long()
40
+ return self.gamma[t_int]
41
+
42
+
43
+
44
+ class PredefinedNoiseScheduleDiscrete(torch.nn.Module):
45
+ """
46
+ Predefined noise schedule. Essentially creates a lookup array for predefined (non-learned) noise schedules.
47
+ """
48
+
49
+ def __init__(self, noise_schedule, timesteps):
50
+ super(PredefinedNoiseScheduleDiscrete, self).__init__()
51
+ self.timesteps = timesteps
52
+
53
+ if noise_schedule == 'cosine':
54
+ betas = diffusion_utils.cosine_beta_schedule_discrete(timesteps)
55
+ elif noise_schedule == 'custom':
56
+ betas = diffusion_utils.custom_beta_schedule_discrete(timesteps)
57
+ else:
58
+ raise NotImplementedError(noise_schedule)
59
+
60
+ self.register_buffer('betas', torch.from_numpy(betas).float())
61
+
62
+ self.alphas = 1 - torch.clamp(self.betas, min=0, max=0.9999)
63
+
64
+ log_alpha = torch.log(self.alphas)
65
+ log_alpha_bar = torch.cumsum(log_alpha, dim=0)
66
+ self.alphas_bar = torch.exp(log_alpha_bar)
67
+ # print(f"[Noise schedule: {noise_schedule}] alpha_bar:", self.alphas_bar)
68
+
69
+ def forward(self, t_normalized=None, t_int=None):
70
+ assert int(t_normalized is None) + int(t_int is None) == 1
71
+ if t_int is None:
72
+ t_int = torch.round(t_normalized * self.timesteps)
73
+
74
+ return self.betas[t_int.long()]
75
+
76
+ def get_alpha_bar(self, t_normalized=None, t_int=None):
77
+ assert int(t_normalized is None) + int(t_int is None) == 1
78
+ if t_int is None:
79
+ t_int = torch.round(t_normalized * self.timesteps)
80
+ return self.alphas_bar.to(t_int.device)[t_int.long()]
81
+
82
+
83
+ class DiscreteUniformTransition:
84
+ def __init__(self, x_classes: int, e_classes: int, y_classes: int):
85
+ self.X_classes = x_classes
86
+ self.E_classes = e_classes
87
+ self.y_classes = y_classes
88
+ self.u_x = torch.ones(1, self.X_classes, self.X_classes)
89
+ if self.X_classes > 0:
90
+ self.u_x = self.u_x / self.X_classes
91
+
92
+ self.u_e = torch.ones(1, self.E_classes, self.E_classes)
93
+ if self.E_classes > 0:
94
+ self.u_e = self.u_e / self.E_classes
95
+
96
+ self.u_y = torch.ones(1, self.y_classes, self.y_classes)
97
+ if self.y_classes > 0:
98
+ self.u_y = self.u_y / self.y_classes
99
+
100
+ def get_Qt(self, beta_t, device):
101
+ """ Returns one-step transition matrices for X and E, from step t - 1 to step t.
102
+ Qt = (1 - beta_t) * I + beta_t / K
103
+
104
+ beta_t: (bs) noise level between 0 and 1
105
+ returns: qx (bs, dx, dx), qe (bs, de, de), qy (bs, dy, dy).
106
+ """
107
+ beta_t = beta_t.unsqueeze(1)
108
+ beta_t = beta_t.to(device)
109
+ self.u_x = self.u_x.to(device)
110
+ self.u_e = self.u_e.to(device)
111
+ self.u_y = self.u_y.to(device)
112
+
113
+ q_x = beta_t * self.u_x + (1 - beta_t) * torch.eye(self.X_classes, device=device).unsqueeze(0)
114
+ q_e = beta_t * self.u_e + (1 - beta_t) * torch.eye(self.E_classes, device=device).unsqueeze(0)
115
+ q_y = beta_t * self.u_y + (1 - beta_t) * torch.eye(self.y_classes, device=device).unsqueeze(0)
116
+
117
+ return utils.PlaceHolder(X=q_x, E=q_e, y=q_y)
118
+
119
+ def get_Qt_bar(self, alpha_bar_t, device):
120
+ """ Returns t-step transition matrices for X and E, from step 0 to step t.
121
+ Qt = prod(1 - beta_t) * I + (1 - prod(1 - beta_t)) / K
122
+
123
+ alpha_bar_t: (bs) Product of the (1 - beta_t) for each time step from 0 to t.
124
+ returns: qx (bs, dx, dx), qe (bs, de, de), qy (bs, dy, dy).
125
+ """
126
+ alpha_bar_t = alpha_bar_t.unsqueeze(1)
127
+
128
+ alpha_bar_t = alpha_bar_t.to(device)
129
+ self.u_x = self.u_x.to(device)
130
+ self.u_e = self.u_e.to(device)
131
+ self.u_y = self.u_y.to(device)
132
+
133
+ q_x = alpha_bar_t * torch.eye(self.X_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_x
134
+ q_e = alpha_bar_t * torch.eye(self.E_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_e
135
+ q_y = alpha_bar_t * torch.eye(self.y_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_y
136
+
137
+ return utils.PlaceHolder(X=q_x, E=q_e, y=q_y)
138
+
139
+
140
+ class MarginalUniformTransition:
141
+ def __init__(self, x_marginals, e_marginals, y_classes):
142
+ self.X_classes = len(x_marginals)
143
+ self.E_classes = len(e_marginals)
144
+ self.y_classes = y_classes
145
+ self.x_marginals = x_marginals
146
+ self.e_marginals = e_marginals
147
+
148
+ self.u_x = x_marginals.unsqueeze(0).expand(self.X_classes, -1).unsqueeze(0)
149
+ self.u_e = e_marginals.unsqueeze(0).expand(self.E_classes, -1).unsqueeze(0)
150
+ self.u_y = torch.ones(1, self.y_classes, self.y_classes)
151
+ if self.y_classes > 0:
152
+ self.u_y = self.u_y / self.y_classes
153
+
154
+ def get_Qt(self, beta_t, device):
155
+ """ Returns one-step transition matrices for X and E, from step t - 1 to step t.
156
+ Qt = (1 - beta_t) * I + beta_t / K
157
+
158
+ beta_t: (bs) noise level between 0 and 1
159
+ returns: qx (bs, dx, dx), qe (bs, de, de), qy (bs, dy, dy). """
160
+ beta_t = beta_t.unsqueeze(1)
161
+ beta_t = beta_t.to(device)
162
+ self.u_x = self.u_x.to(device)
163
+ self.u_e = self.u_e.to(device)
164
+ self.u_y = self.u_y.to(device)
165
+
166
+ q_x = beta_t * self.u_x + (1 - beta_t) * torch.eye(self.X_classes, device=device).unsqueeze(0)
167
+ q_e = beta_t * self.u_e + (1 - beta_t) * torch.eye(self.E_classes, device=device).unsqueeze(0)
168
+ q_y = beta_t * self.u_y + (1 - beta_t) * torch.eye(self.y_classes, device=device).unsqueeze(0)
169
+
170
+ return utils.PlaceHolder(X=q_x, E=q_e, y=q_y)
171
+
172
+ def get_Qt_bar(self, alpha_bar_t, device):
173
+ """ Returns t-step transition matrices for X and E, from step 0 to step t.
174
+ Qt = prod(1 - beta_t) * I + (1 - prod(1 - beta_t)) * K
175
+
176
+ alpha_bar_t: (bs) Product of the (1 - beta_t) for each time step from 0 to t.
177
+ returns: qx (bs, dx, dx), qe (bs, de, de), qy (bs, dy, dy).
178
+ """
179
+ alpha_bar_t = alpha_bar_t.unsqueeze(1)
180
+ alpha_bar_t = alpha_bar_t.to(device)
181
+ self.u_x = self.u_x.to(device)
182
+ self.u_e = self.u_e.to(device)
183
+ self.u_y = self.u_y.to(device)
184
+
185
+ q_x = alpha_bar_t * torch.eye(self.X_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_x
186
+ q_e = alpha_bar_t * torch.eye(self.E_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_e
187
+ q_y = alpha_bar_t * torch.eye(self.y_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_y
188
+
189
+ return utils.PlaceHolder(X=q_x, E=q_e, y=q_y)
190
+
191
+
192
+ class AbsorbingStateTransition:
193
+ def __init__(self, abs_state: int, x_classes: int, e_classes: int, y_classes: int):
194
+ self.X_classes = x_classes
195
+ self.E_classes = e_classes
196
+ self.y_classes = y_classes
197
+
198
+ self.u_x = torch.zeros(1, self.X_classes, self.X_classes)
199
+ self.u_x[:, :, abs_state] = 1
200
+
201
+ self.u_e = torch.zeros(1, self.E_classes, self.E_classes)
202
+ self.u_e[:, :, abs_state] = 1
203
+
204
+ self.u_y = torch.zeros(1, self.y_classes, self.y_classes)
205
+ self.u_e[:, :, abs_state] = 1
206
+
207
+ def get_Qt(self, beta_t):
208
+ """ Returns two transition matrix for X and E"""
209
+ beta_t = beta_t.unsqueeze(1)
210
+ q_x = beta_t * self.u_x + (1 - beta_t) * torch.eye(self.X_classes).unsqueeze(0)
211
+ q_e = beta_t * self.u_e + (1 - beta_t) * torch.eye(self.E_classes).unsqueeze(0)
212
+ q_y = beta_t * self.u_y + (1 - beta_t) * torch.eye(self.y_classes).unsqueeze(0)
213
+ return q_x, q_e, q_y
214
+
215
+ def get_Qt_bar(self, alpha_bar_t):
216
+ """ beta_t: (bs)
217
+ Returns transition matrices for X and E"""
218
+
219
+ alpha_bar_t = alpha_bar_t.unsqueeze(1)
220
+
221
+ q_x = alpha_bar_t * torch.eye(self.X_classes).unsqueeze(0) + (1 - alpha_bar_t) * self.u_x
222
+ q_e = alpha_bar_t * torch.eye(self.E_classes).unsqueeze(0) + (1 - alpha_bar_t) * self.u_e
223
+ q_y = alpha_bar_t * torch.eye(self.y_classes).unsqueeze(0) + (1 - alpha_bar_t) * self.u_y
224
+
225
+ return q_x, q_e, q_y
diffusion/utils.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch_geometric.utils
3
+ from omegaconf import OmegaConf, open_dict
4
+ from torch_geometric.utils import to_dense_adj, to_dense_batch
5
+ import torch
6
+ import omegaconf
7
+ import wandb
8
+
9
+ def create_folders(args):
10
+ try:
11
+ # os.makedirs('checkpoints')
12
+ os.makedirs('graphs')
13
+ os.makedirs('chains')
14
+ except OSError:
15
+ pass
16
+
17
+ try:
18
+ # os.makedirs('checkpoints/' + args.general.name)
19
+ os.makedirs('graphs/' + args.general.name)
20
+ os.makedirs('chains/' + args.general.name)
21
+ except OSError:
22
+ pass
23
+
24
+
25
+ def normalize(X, E, y, norm_values, norm_biases, node_mask):
26
+ X = (X - norm_biases[0]) / norm_values[0]
27
+ E = (E - norm_biases[1]) / norm_values[1]
28
+ y = (y - norm_biases[2]) / norm_values[2]
29
+
30
+ diag = torch.eye(E.shape[1], dtype=torch.bool).unsqueeze(0).expand(E.shape[0], -1, -1)
31
+ E[diag] = 0
32
+
33
+ return PlaceHolder(X=X, E=E, y=y).mask(node_mask)
34
+
35
+
36
+ def unnormalize(X, E, y, norm_values, norm_biases, node_mask, collapse=False):
37
+ """
38
+ X : node features
39
+ E : edge features
40
+ y : global features`
41
+ norm_values : [norm value X, norm value E, norm value y]
42
+ norm_biases : same order
43
+ node_mask
44
+ """
45
+ X = (X * norm_values[0] + norm_biases[0])
46
+ E = (E * norm_values[1] + norm_biases[1])
47
+ y = y * norm_values[2] + norm_biases[2]
48
+
49
+ return PlaceHolder(X=X, E=E, y=y).mask(node_mask, collapse)
50
+
51
+
52
+ def to_dense(x, edge_index, edge_attr, batch):
53
+ X, node_mask = to_dense_batch(x=x, batch=batch)
54
+ # node_mask = node_mask.float()
55
+ edge_index, edge_attr = torch_geometric.utils.remove_self_loops(edge_index, edge_attr)
56
+ # TODO: carefully check if setting node_mask as a bool breaks the continuous case
57
+ max_num_nodes = X.size(1)
58
+ E = to_dense_adj(edge_index=edge_index, batch=batch, edge_attr=edge_attr, max_num_nodes=max_num_nodes)
59
+ E = encode_no_edge(E)
60
+
61
+ return PlaceHolder(X=X, E=E, y=None), node_mask
62
+
63
+
64
+ def encode_no_edge(E):
65
+ assert len(E.shape) == 4
66
+ if E.shape[-1] == 0:
67
+ return E
68
+ no_edge = torch.sum(E, dim=3) == 0
69
+ first_elt = E[:, :, :, 0]
70
+ first_elt[no_edge] = 1
71
+ E[:, :, :, 0] = first_elt
72
+ diag = torch.eye(E.shape[1], dtype=torch.bool).unsqueeze(0).expand(E.shape[0], -1, -1)
73
+ E[diag] = 0
74
+ return E
75
+
76
+
77
+ def update_config_with_new_keys(cfg, saved_cfg):
78
+ saved_general = saved_cfg.general
79
+ saved_train = saved_cfg.train
80
+ saved_model = saved_cfg.model
81
+
82
+ for key, val in saved_general.items():
83
+ OmegaConf.set_struct(cfg.general, True)
84
+ with open_dict(cfg.general):
85
+ if key not in cfg.general.keys():
86
+ setattr(cfg.general, key, val)
87
+
88
+ OmegaConf.set_struct(cfg.train, True)
89
+ with open_dict(cfg.train):
90
+ for key, val in saved_train.items():
91
+ if key not in cfg.train.keys():
92
+ setattr(cfg.train, key, val)
93
+
94
+ OmegaConf.set_struct(cfg.model, True)
95
+ with open_dict(cfg.model):
96
+ for key, val in saved_model.items():
97
+ if key not in cfg.model.keys():
98
+ setattr(cfg.model, key, val)
99
+ return cfg
100
+
101
+
102
+ class PlaceHolder:
103
+ def __init__(self, X, E, y):
104
+ self.X = X
105
+ self.E = E
106
+ self.y = y
107
+
108
+ def type_as(self, x: torch.Tensor):
109
+ """ Changes the device and dtype of X, E, y. """
110
+ self.X = self.X.type_as(x)
111
+ self.E = self.E.type_as(x)
112
+ self.y = self.y.type_as(x)
113
+ return self
114
+
115
+ def mask(self, node_mask, collapse=False):
116
+ x_mask = node_mask.unsqueeze(-1) # bs, n, 1
117
+ e_mask1 = x_mask.unsqueeze(2) # bs, n, 1, 1
118
+ e_mask2 = x_mask.unsqueeze(1) # bs, 1, n, 1
119
+
120
+ if collapse:
121
+ self.X = torch.argmax(self.X, dim=-1)
122
+ self.E = torch.argmax(self.E, dim=-1)
123
+
124
+ self.X[node_mask == 0] = - 1
125
+ self.E[(e_mask1 * e_mask2).squeeze(-1) == 0] = - 1
126
+ else:
127
+ self.X = self.X * x_mask
128
+ self.E = self.E * e_mask1 * e_mask2
129
+ assert torch.allclose(self.E, torch.transpose(self.E, 1, 2))
130
+ return self
131
+
132
+ def setup_wandb(cfg):
133
+ config_dict = omegaconf.OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True)
134
+ kwargs = {'name': cfg.general.name, 'project': f'graph_ddm_{cfg.dataset.name}', 'config': config_dict,
135
+ 'settings': wandb.Settings(_disable_stats=True), 'reinit': True, 'mode': cfg.general.wandb}
136
+ wandb.init(**kwargs)
137
+ wandb.save('*.txt')
distributions.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+
4
+ class DistributionNodes:
5
+ def __init__(self, histogram):
6
+ """ Compute the distribution of the number of nodes in the dataset, and sample from this distribution.
7
+ historgram: dict. The keys are num_nodes, the values are counts
8
+ """
9
+
10
+ if type(histogram) == dict:
11
+ max_n_nodes = max(histogram.keys())
12
+ prob = torch.zeros(max_n_nodes + 1)
13
+ for num_nodes, count in histogram.items():
14
+ prob[num_nodes] = count
15
+ else:
16
+ prob = histogram
17
+
18
+ self.prob = prob / prob.sum()
19
+ self.m = torch.distributions.Categorical(prob)
20
+
21
+ def sample_n(self, n_samples, device):
22
+ idx = self.m.sample((n_samples,))
23
+ return idx.to(device)
24
+
25
+ def log_prob(self, batch_n_nodes):
26
+ assert len(batch_n_nodes.size()) == 1
27
+ p = self.prob.to(batch_n_nodes.device)
28
+
29
+ mask = batch_n_nodes >= p.shape[0]
30
+ batch_n_nodes[mask] = p.shape[0] - 1
31
+
32
+ probas = p[batch_n_nodes]
33
+
34
+ probas[mask] = 0
35
+ log_p = torch.log(probas + 1e-30)
36
+
37
+ return log_p
extra_features.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import utils
3
+
4
+
5
+ class DummyExtraFeatures:
6
+ def __init__(self):
7
+ """ This class does not compute anything, just returns empty tensors."""
8
+
9
+ def __call__(self, noisy_data):
10
+ X = noisy_data['X_t']
11
+ E = noisy_data['E_t']
12
+ y = noisy_data['y_t']
13
+ empty_x = X.new_zeros((*X.shape[:-1], 0))
14
+ empty_e = E.new_zeros((*E.shape[:-1], 0))
15
+ empty_y = y.new_zeros((y.shape[0], 0))
16
+ return utils.PlaceHolder(X=empty_x, E=empty_e, y=empty_y)
17
+
18
+
19
+ class ExtraFeatures:
20
+ def __init__(self, extra_features_type, max_n_nodes):
21
+ self.max_n_nodes = max_n_nodes
22
+ self.ncycles = NodeCycleFeatures()
23
+ self.features_type = extra_features_type
24
+ if extra_features_type in ['eigenvalues', 'all']:
25
+ self.eigenfeatures = EigenFeatures(mode=extra_features_type)
26
+
27
+ def __call__(self, noisy_data):
28
+ n = noisy_data['node_mask'].sum(dim=1).unsqueeze(1) / self.max_n_nodes
29
+ x_cycles, y_cycles = self.ncycles(noisy_data) # (bs, n_cycles)
30
+
31
+ if self.features_type == 'cycles':
32
+ E = noisy_data['E_t']
33
+ extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E)
34
+ return utils.PlaceHolder(X=x_cycles, E=extra_edge_attr, y=torch.hstack((n, y_cycles)))
35
+
36
+ elif self.features_type == 'eigenvalues':
37
+ eigenfeatures = self.eigenfeatures(noisy_data)
38
+ E = noisy_data['E_t']
39
+ extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E)
40
+ n_components, batched_eigenvalues = eigenfeatures # (bs, 1), (bs, 10)
41
+ return utils.PlaceHolder(X=x_cycles, E=extra_edge_attr, y=torch.hstack((n, y_cycles, n_components,
42
+ batched_eigenvalues)))
43
+ elif self.features_type == 'all':
44
+ eigenfeatures = self.eigenfeatures(noisy_data)
45
+ E = noisy_data['E_t']
46
+ extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E)
47
+ n_components, batched_eigenvalues, nonlcc_indicator, k_lowest_eigvec = eigenfeatures # (bs, 1), (bs, 10),
48
+ # (bs, n, 1), (bs, n, 2)
49
+
50
+ return utils.PlaceHolder(X=torch.cat((x_cycles, nonlcc_indicator, k_lowest_eigvec), dim=-1),
51
+ E=extra_edge_attr,
52
+ y=torch.hstack((n, y_cycles, n_components, batched_eigenvalues)))
53
+ else:
54
+ raise ValueError(f"Features type {self.features_type} not implemented")
55
+
56
+
57
+ class NodeCycleFeatures:
58
+ def __init__(self):
59
+ self.kcycles = KNodeCycles()
60
+
61
+ def __call__(self, noisy_data):
62
+ adj_matrix = noisy_data['E_t'][..., 1:].sum(dim=-1).float()
63
+
64
+ x_cycles, y_cycles = self.kcycles.k_cycles(adj_matrix=adj_matrix) # (bs, n_cycles)
65
+ x_cycles = x_cycles.type_as(adj_matrix) * noisy_data['node_mask'].unsqueeze(-1)
66
+ # Avoid large values when the graph is dense
67
+ x_cycles = x_cycles / 10
68
+ y_cycles = y_cycles / 10
69
+ x_cycles[x_cycles > 1] = 1
70
+ y_cycles[y_cycles > 1] = 1
71
+ return x_cycles, y_cycles
72
+
73
+
74
+ class EigenFeatures:
75
+ """
76
+ Code taken from : https://github.com/Saro00/DGN/blob/master/models/pytorch/eigen_agg.py
77
+ """
78
+ def __init__(self, mode):
79
+ """ mode: 'eigenvalues' or 'all' """
80
+ self.mode = mode
81
+
82
+ def __call__(self, noisy_data):
83
+ E_t = noisy_data['E_t']
84
+ mask = noisy_data['node_mask']
85
+ A = E_t[..., 1:].sum(dim=-1).float() * mask.unsqueeze(1) * mask.unsqueeze(2)
86
+ L = compute_laplacian(A, normalize=False)
87
+ mask_diag = 2 * L.shape[-1] * torch.eye(A.shape[-1]).type_as(L).unsqueeze(0)
88
+ mask_diag = mask_diag * (~mask.unsqueeze(1)) * (~mask.unsqueeze(2))
89
+ L = L * mask.unsqueeze(1) * mask.unsqueeze(2) + mask_diag
90
+
91
+ if self.mode == 'eigenvalues':
92
+ eigvals = torch.linalg.eigvalsh(L) # bs, n
93
+ eigvals = eigvals.type_as(A) / torch.sum(mask, dim=1, keepdim=True)
94
+
95
+ n_connected_comp, batch_eigenvalues = get_eigenvalues_features(eigenvalues=eigvals)
96
+ return n_connected_comp.type_as(A), batch_eigenvalues.type_as(A)
97
+
98
+ elif self.mode == 'all':
99
+ eigvals, eigvectors = torch.linalg.eigh(L)
100
+ eigvals = eigvals.type_as(A) / torch.sum(mask, dim=1, keepdim=True)
101
+ eigvectors = eigvectors * mask.unsqueeze(2) * mask.unsqueeze(1)
102
+ # Retrieve eigenvalues features
103
+ n_connected_comp, batch_eigenvalues = get_eigenvalues_features(eigenvalues=eigvals)
104
+
105
+ # Retrieve eigenvectors features
106
+ nonlcc_indicator, k_lowest_eigenvector = get_eigenvectors_features(vectors=eigvectors,
107
+ node_mask=noisy_data['node_mask'],
108
+ n_connected=n_connected_comp)
109
+ return n_connected_comp, batch_eigenvalues, nonlcc_indicator, k_lowest_eigenvector
110
+ else:
111
+ raise NotImplementedError(f"Mode {self.mode} is not implemented")
112
+
113
+
114
+ def compute_laplacian(adjacency, normalize: bool):
115
+ """
116
+ adjacency : batched adjacency matrix (bs, n, n)
117
+ normalize: can be None, 'sym' or 'rw' for the combinatorial, symmetric normalized or random walk Laplacians
118
+ Return:
119
+ L (n x n ndarray): combinatorial or symmetric normalized Laplacian.
120
+ """
121
+ diag = torch.sum(adjacency, dim=-1) # (bs, n)
122
+ n = diag.shape[-1]
123
+ D = torch.diag_embed(diag) # Degree matrix # (bs, n, n)
124
+ combinatorial = D - adjacency # (bs, n, n)
125
+
126
+ if not normalize:
127
+ return (combinatorial + combinatorial.transpose(1, 2)) / 2
128
+
129
+ diag0 = diag.clone()
130
+ diag[diag == 0] = 1e-12
131
+
132
+ diag_norm = 1 / torch.sqrt(diag) # (bs, n)
133
+ D_norm = torch.diag_embed(diag_norm) # (bs, n, n)
134
+ L = torch.eye(n).unsqueeze(0) - D_norm @ adjacency @ D_norm
135
+ L[diag0 == 0] = 0
136
+ return (L + L.transpose(1, 2)) / 2
137
+
138
+
139
+ def get_eigenvalues_features(eigenvalues, k=5):
140
+ """
141
+ values : eigenvalues -- (bs, n)
142
+ node_mask: (bs, n)
143
+ k: num of non zero eigenvalues to keep
144
+ """
145
+ ev = eigenvalues
146
+ bs, n = ev.shape
147
+ n_connected_components = (ev < 1e-5).sum(dim=-1)
148
+ # assert (n_connected_components > 0).all(), (n_connected_components, ev)
149
+
150
+ to_extend = max(n_connected_components) + k - n
151
+ if to_extend > 0:
152
+ eigenvalues = torch.hstack((eigenvalues, 2 * torch.ones(bs, to_extend).type_as(eigenvalues)))
153
+ indices = torch.arange(k).type_as(eigenvalues).long().unsqueeze(0) + n_connected_components.unsqueeze(1)
154
+ first_k_ev = torch.gather(eigenvalues, dim=1, index=indices)
155
+ return n_connected_components.unsqueeze(-1), first_k_ev
156
+
157
+
158
+ def get_eigenvectors_features(vectors, node_mask, n_connected, k=2):
159
+ """
160
+ vectors (bs, n, n) : eigenvectors of Laplacian IN COLUMNS
161
+ returns:
162
+ not_lcc_indicator : indicator vectors of largest connected component (lcc) for each graph -- (bs, n, 1)
163
+ k_lowest_eigvec : k first eigenvectors for the largest connected component -- (bs, n, k)
164
+ """
165
+ bs, n = vectors.size(0), vectors.size(1)
166
+
167
+ # Create an indicator for the nodes outside the largest connected components
168
+ first_ev = torch.round(vectors[:, :, 0], decimals=3) * node_mask # bs, n
169
+ # Add random value to the mask to prevent 0 from becoming the mode
170
+ random = torch.randn(bs, n, device=node_mask.device) * (~node_mask) # bs, n
171
+ first_ev = first_ev + random
172
+ most_common = torch.mode(first_ev, dim=1).values # values: bs -- indices: bs
173
+ mask = ~ (first_ev == most_common.unsqueeze(1))
174
+ not_lcc_indicator = (mask * node_mask).unsqueeze(-1).float()
175
+
176
+ # Get the eigenvectors corresponding to the first nonzero eigenvalues
177
+ to_extend = max(n_connected) + k - n
178
+ if to_extend > 0:
179
+ vectors = torch.cat((vectors, torch.zeros(bs, n, to_extend).type_as(vectors)), dim=2) # bs, n , n + to_extend
180
+ indices = torch.arange(k).type_as(vectors).long().unsqueeze(0).unsqueeze(0) + n_connected.unsqueeze(2) # bs, 1, k
181
+ indices = indices.expand(-1, n, -1) # bs, n, k
182
+ first_k_ev = torch.gather(vectors, dim=2, index=indices) # bs, n, k
183
+ first_k_ev = first_k_ev * node_mask.unsqueeze(2)
184
+
185
+ return not_lcc_indicator, first_k_ev
186
+
187
+ def batch_trace(X):
188
+ """
189
+ Expect a matrix of shape B N N, returns the trace in shape B
190
+ :param X:
191
+ :return:
192
+ """
193
+ diag = torch.diagonal(X, dim1=-2, dim2=-1)
194
+ trace = diag.sum(dim=-1)
195
+ return trace
196
+
197
+
198
+ def batch_diagonal(X):
199
+ """
200
+ Extracts the diagonal from the last two dims of a tensor
201
+ :param X:
202
+ :return:
203
+ """
204
+ return torch.diagonal(X, dim1=-2, dim2=-1)
205
+
206
+
207
+ class KNodeCycles:
208
+ """ Builds cycle counts for each node in a graph.
209
+ """
210
+
211
+ def __init__(self):
212
+ super().__init__()
213
+
214
+ def calculate_kpowers(self):
215
+ self.k1_matrix = self.adj_matrix.float()
216
+ self.d = self.adj_matrix.sum(dim=-1)
217
+ self.k2_matrix = self.k1_matrix @ self.adj_matrix.float()
218
+ self.k3_matrix = self.k2_matrix @ self.adj_matrix.float()
219
+ self.k4_matrix = self.k3_matrix @ self.adj_matrix.float()
220
+ self.k5_matrix = self.k4_matrix @ self.adj_matrix.float()
221
+ self.k6_matrix = self.k5_matrix @ self.adj_matrix.float()
222
+
223
+ def k3_cycle(self):
224
+ """ tr(A ** 3). """
225
+ c3 = batch_diagonal(self.k3_matrix)
226
+ return (c3 / 2).unsqueeze(-1).float(), (torch.sum(c3, dim=-1) / 6).unsqueeze(-1).float()
227
+
228
+ def k4_cycle(self):
229
+ diag_a4 = batch_diagonal(self.k4_matrix)
230
+ c4 = diag_a4 - self.d * (self.d - 1) - (self.adj_matrix @ self.d.unsqueeze(-1)).sum(dim=-1)
231
+ return (c4 / 2).unsqueeze(-1).float(), (torch.sum(c4, dim=-1) / 8).unsqueeze(-1).float()
232
+
233
+ def k5_cycle(self):
234
+ diag_a5 = batch_diagonal(self.k5_matrix)
235
+ triangles = batch_diagonal(self.k3_matrix)
236
+ c5 = diag_a5 - 2 * triangles * self.d - (self.adj_matrix @ triangles.unsqueeze(-1)).sum(dim=-1) + triangles
237
+ return (c5 / 2).unsqueeze(-1).float(), (c5.sum(dim=-1) / 10).unsqueeze(-1).float()
238
+
239
+ def k6_cycle(self):
240
+ term_1_t = batch_trace(self.k6_matrix)
241
+ term_2_t = batch_trace(self.k3_matrix ** 2)
242
+ term3_t = torch.sum(self.adj_matrix * self.k2_matrix.pow(2), dim=[-2, -1])
243
+ d_t4 = batch_diagonal(self.k2_matrix)
244
+ a_4_t = batch_diagonal(self.k4_matrix)
245
+ term_4_t = (d_t4 * a_4_t).sum(dim=-1)
246
+ term_5_t = batch_trace(self.k4_matrix)
247
+ term_6_t = batch_trace(self.k3_matrix)
248
+ term_7_t = batch_diagonal(self.k2_matrix).pow(3).sum(-1)
249
+ term8_t = torch.sum(self.k3_matrix, dim=[-2, -1])
250
+ term9_t = batch_diagonal(self.k2_matrix).pow(2).sum(-1)
251
+ term10_t = batch_trace(self.k2_matrix)
252
+
253
+ c6_t = (term_1_t - 3 * term_2_t + 9 * term3_t - 6 * term_4_t + 6 * term_5_t - 4 * term_6_t + 4 * term_7_t +
254
+ 3 * term8_t - 12 * term9_t + 4 * term10_t)
255
+ return None, (c6_t / 12).unsqueeze(-1).float()
256
+
257
+ def k_cycles(self, adj_matrix, verbose=False):
258
+ self.adj_matrix = adj_matrix
259
+ self.calculate_kpowers()
260
+
261
+ k3x, k3y = self.k3_cycle()
262
+ assert (k3x >= -0.1).all()
263
+
264
+ k4x, k4y = self.k4_cycle()
265
+ assert (k4x >= -0.1).all()
266
+
267
+ k5x, k5y = self.k5_cycle()
268
+ assert (k5x >= -0.1).all(), k5x
269
+
270
+ _, k6y = self.k6_cycle()
271
+ assert (k6y >= -0.1).all()
272
+
273
+ kcyclesx = torch.cat([k3x, k4x, k5x], dim=-1)
274
+ kcyclesy = torch.cat([k3y, k4y, k5y, k6y], dim=-1)
275
+ return kcyclesx, kcyclesy
models/__init__.py ADDED
File without changes
models/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (163 Bytes). View file
 
models/__pycache__/layers.cpython-39.pyc ADDED
Binary file (1.91 kB). View file
 
models/__pycache__/transformer_model.cpython-39.pyc ADDED
Binary file (7.99 kB). View file
 
models/layers.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+
5
+ class Xtoy(nn.Module):
6
+ def __init__(self, dx, dy):
7
+ """ Map node features to global features """
8
+ super().__init__()
9
+ self.lin = nn.Linear(4 * dx, dy)
10
+
11
+ def forward(self, X):
12
+ """ X: bs, n, dx. """
13
+ m = X.mean(dim=1)
14
+ mi = X.min(dim=1)[0]
15
+ ma = X.max(dim=1)[0]
16
+ std = X.std(dim=1)
17
+ z = torch.hstack((m, mi, ma, std))
18
+ out = self.lin(z)
19
+ return out
20
+
21
+
22
+ class Etoy(nn.Module):
23
+ def __init__(self, d, dy):
24
+ """ Map edge features to global features. """
25
+ super().__init__()
26
+ self.lin = nn.Linear(4 * d, dy)
27
+
28
+ def forward(self, E):
29
+ """ E: bs, n, n, de
30
+ Features relative to the diagonal of E could potentially be added.
31
+ """
32
+ m = E.mean(dim=(1, 2))
33
+ mi = E.min(dim=2)[0].min(dim=1)[0]
34
+ ma = E.max(dim=2)[0].max(dim=1)[0]
35
+ std = torch.std(E, dim=(1, 2))
36
+ z = torch.hstack((m, mi, ma, std))
37
+ out = self.lin(z)
38
+ return out
39
+
40
+
41
+ def masked_softmax(x, mask, **kwargs):
42
+ if mask.sum() == 0:
43
+ return x
44
+ x_masked = x.clone()
45
+ x_masked[mask == 0] = -float("inf")
46
+ return torch.softmax(x_masked, **kwargs)
models/transformer_model.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.nn.modules.dropout import Dropout
6
+ from torch.nn.modules.linear import Linear
7
+ from torch.nn.modules.normalization import LayerNorm
8
+ from torch.nn import functional as F
9
+ from torch import Tensor
10
+
11
+ import utils
12
+ from diffusion import diffusion_utils
13
+ from models.layers import Xtoy, Etoy, masked_softmax
14
+
15
+
16
+ class XEyTransformerLayer(nn.Module):
17
+ """ Transformer that updates node, edge and global features
18
+ d_x: node features
19
+ d_e: edge features
20
+ dz : global features
21
+ n_head: the number of heads in the multi_head_attention
22
+ dim_feedforward: the dimension of the feedforward network model after self-attention
23
+ dropout: dropout probablility. 0 to disable
24
+ layer_norm_eps: eps value in layer normalizations.
25
+ """
26
+ def __init__(self, dx: int, de: int, dy: int, n_head: int, dim_ffX: int = 2048,
27
+ dim_ffE: int = 128, dim_ffy: int = 2048, dropout: float = 0.1,
28
+ layer_norm_eps: float = 1e-5, device=None, dtype=None) -> None:
29
+ kw = {'device': device, 'dtype': dtype}
30
+ super().__init__()
31
+
32
+ self.self_attn = NodeEdgeBlock(dx, de, dy, n_head, **kw)
33
+
34
+ self.linX1 = Linear(dx, dim_ffX, **kw)
35
+ self.linX2 = Linear(dim_ffX, dx, **kw)
36
+ self.normX1 = LayerNorm(dx, eps=layer_norm_eps, **kw)
37
+ self.normX2 = LayerNorm(dx, eps=layer_norm_eps, **kw)
38
+ self.dropoutX1 = Dropout(dropout)
39
+ self.dropoutX2 = Dropout(dropout)
40
+ self.dropoutX3 = Dropout(dropout)
41
+
42
+ self.linE1 = Linear(de, dim_ffE, **kw)
43
+ self.linE2 = Linear(dim_ffE, de, **kw)
44
+ self.normE1 = LayerNorm(de, eps=layer_norm_eps, **kw)
45
+ self.normE2 = LayerNorm(de, eps=layer_norm_eps, **kw)
46
+ self.dropoutE1 = Dropout(dropout)
47
+ self.dropoutE2 = Dropout(dropout)
48
+ self.dropoutE3 = Dropout(dropout)
49
+
50
+ self.lin_y1 = Linear(dy, dim_ffy, **kw)
51
+ self.lin_y2 = Linear(dim_ffy, dy, **kw)
52
+ self.norm_y1 = LayerNorm(dy, eps=layer_norm_eps, **kw)
53
+ self.norm_y2 = LayerNorm(dy, eps=layer_norm_eps, **kw)
54
+ self.dropout_y1 = Dropout(dropout)
55
+ self.dropout_y2 = Dropout(dropout)
56
+ self.dropout_y3 = Dropout(dropout)
57
+
58
+ self.activation = F.relu
59
+
60
+ def forward(self, X: Tensor, E: Tensor, y, node_mask: Tensor):
61
+ """ Pass the input through the encoder layer.
62
+ X: (bs, n, d)
63
+ E: (bs, n, n, d)
64
+ y: (bs, dy)
65
+ node_mask: (bs, n) Mask for the src keys per batch (optional)
66
+ Output: newX, newE, new_y with the same shape.
67
+ """
68
+
69
+ newX, newE, new_y = self.self_attn(X, E, y, node_mask=node_mask)
70
+
71
+ newX_d = self.dropoutX1(newX)
72
+ X = self.normX1(X + newX_d)
73
+
74
+ newE_d = self.dropoutE1(newE)
75
+ E = self.normE1(E + newE_d)
76
+
77
+ new_y_d = self.dropout_y1(new_y)
78
+ y = self.norm_y1(y + new_y_d)
79
+
80
+ ff_outputX = self.linX2(self.dropoutX2(self.activation(self.linX1(X))))
81
+ ff_outputX = self.dropoutX3(ff_outputX)
82
+ X = self.normX2(X + ff_outputX)
83
+
84
+ ff_outputE = self.linE2(self.dropoutE2(self.activation(self.linE1(E))))
85
+ ff_outputE = self.dropoutE3(ff_outputE)
86
+ E = self.normE2(E + ff_outputE)
87
+
88
+ ff_output_y = self.lin_y2(self.dropout_y2(self.activation(self.lin_y1(y))))
89
+ ff_output_y = self.dropout_y3(ff_output_y)
90
+ y = self.norm_y2(y + ff_output_y)
91
+
92
+ return X, E, y
93
+
94
+
95
+ class NodeEdgeBlock(nn.Module):
96
+ """ Self attention layer that also updates the representations on the edges. """
97
+ def __init__(self, dx, de, dy, n_head, **kwargs):
98
+ super().__init__()
99
+ assert dx % n_head == 0, f"dx: {dx} -- nhead: {n_head}"
100
+ self.dx = dx
101
+ self.de = de
102
+ self.dy = dy
103
+ self.df = int(dx / n_head)
104
+ self.n_head = n_head
105
+
106
+ # Attention
107
+ self.q = Linear(dx, dx)
108
+ self.k = Linear(dx, dx)
109
+ self.v = Linear(dx, dx)
110
+
111
+ # FiLM E to X
112
+ self.e_add = Linear(de, dx)
113
+ self.e_mul = Linear(de, dx)
114
+
115
+ # FiLM y to E
116
+ self.y_e_mul = Linear(dy, dx) # Warning: here it's dx and not de
117
+ self.y_e_add = Linear(dy, dx)
118
+
119
+ # FiLM y to X
120
+ self.y_x_mul = Linear(dy, dx)
121
+ self.y_x_add = Linear(dy, dx)
122
+
123
+ # Process y
124
+ self.y_y = Linear(dy, dy)
125
+ self.x_y = Xtoy(dx, dy)
126
+ self.e_y = Etoy(de, dy)
127
+
128
+ # Output layers
129
+ self.x_out = Linear(dx, dx)
130
+ self.e_out = Linear(dx, de)
131
+ self.y_out = nn.Sequential(nn.Linear(dy, dy), nn.ReLU(), nn.Linear(dy, dy))
132
+
133
+ def forward(self, X, E, y, node_mask):
134
+ """
135
+ :param X: bs, n, d node features
136
+ :param E: bs, n, n, d edge features
137
+ :param y: bs, dz global features
138
+ :param node_mask: bs, n
139
+ :return: newX, newE, new_y with the same shape.
140
+ """
141
+ bs, n, _ = X.shape
142
+ x_mask = node_mask.unsqueeze(-1) # bs, n, 1
143
+ e_mask1 = x_mask.unsqueeze(2) # bs, n, 1, 1
144
+ e_mask2 = x_mask.unsqueeze(1) # bs, 1, n, 1
145
+
146
+ # 1. Map X to keys and queries
147
+ Q = self.q(X) * x_mask # (bs, n, dx)
148
+ K = self.k(X) * x_mask # (bs, n, dx)
149
+ diffusion_utils.assert_correctly_masked(Q, x_mask)
150
+ # 2. Reshape to (bs, n, n_head, df) with dx = n_head * df
151
+
152
+ Q = Q.reshape((Q.size(0), Q.size(1), self.n_head, self.df))
153
+ K = K.reshape((K.size(0), K.size(1), self.n_head, self.df))
154
+
155
+ Q = Q.unsqueeze(2) # (bs, 1, n, n_head, df)
156
+ K = K.unsqueeze(1) # (bs, n, 1, n head, df)
157
+
158
+ # Compute unnormalized attentions. Y is (bs, n, n, n_head, df)
159
+ Y = Q * K
160
+ Y = Y / math.sqrt(Y.size(-1))
161
+ diffusion_utils.assert_correctly_masked(Y, (e_mask1 * e_mask2).unsqueeze(-1))
162
+
163
+ E1 = self.e_mul(E) * e_mask1 * e_mask2 # bs, n, n, dx
164
+ E1 = E1.reshape((E.size(0), E.size(1), E.size(2), self.n_head, self.df))
165
+
166
+ E2 = self.e_add(E) * e_mask1 * e_mask2 # bs, n, n, dx
167
+ E2 = E2.reshape((E.size(0), E.size(1), E.size(2), self.n_head, self.df))
168
+
169
+ # Incorporate edge features to the self attention scores.
170
+ Y = Y * (E1 + 1) + E2 # (bs, n, n, n_head, df)
171
+
172
+ # Incorporate y to E
173
+ newE = Y.flatten(start_dim=3) # bs, n, n, dx
174
+ ye1 = self.y_e_add(y).unsqueeze(1).unsqueeze(1) # bs, 1, 1, de
175
+ ye2 = self.y_e_mul(y).unsqueeze(1).unsqueeze(1)
176
+ newE = ye1 + (ye2 + 1) * newE
177
+
178
+ # Output E
179
+ newE = self.e_out(newE) * e_mask1 * e_mask2 # bs, n, n, de
180
+ diffusion_utils.assert_correctly_masked(newE, e_mask1 * e_mask2)
181
+
182
+ # Compute attentions. attn is still (bs, n, n, n_head, df)
183
+ softmax_mask = e_mask2.expand(-1, n, -1, self.n_head) # bs, 1, n, 1
184
+ attn = masked_softmax(Y, softmax_mask, dim=2) # bs, n, n, n_head
185
+
186
+ V = self.v(X) * x_mask # bs, n, dx
187
+ V = V.reshape((V.size(0), V.size(1), self.n_head, self.df))
188
+ V = V.unsqueeze(1) # (bs, 1, n, n_head, df)
189
+
190
+ # Compute weighted values
191
+ weighted_V = attn * V
192
+ weighted_V = weighted_V.sum(dim=2)
193
+
194
+ # Send output to input dim
195
+ weighted_V = weighted_V.flatten(start_dim=2) # bs, n, dx
196
+
197
+ # Incorporate y to X
198
+ yx1 = self.y_x_add(y).unsqueeze(1)
199
+ yx2 = self.y_x_mul(y).unsqueeze(1)
200
+ newX = yx1 + (yx2 + 1) * weighted_V
201
+
202
+ # Output X
203
+ newX = self.x_out(newX) * x_mask
204
+ diffusion_utils.assert_correctly_masked(newX, x_mask)
205
+
206
+ # Process y based on X axnd E
207
+ y = self.y_y(y)
208
+ e_y = self.e_y(E)
209
+ x_y = self.x_y(X)
210
+ new_y = y + x_y + e_y
211
+ new_y = self.y_out(new_y) # bs, dy
212
+
213
+ return newX, newE, new_y
214
+
215
+
216
+ class GraphTransformer(nn.Module):
217
+ """
218
+ n_layers : int -- number of layers
219
+ dims : dict -- contains dimensions for each feature type
220
+ """
221
+ def __init__(self, n_layers: int, input_dims: dict, cond_dims: int, hidden_mlp_dims: dict, hidden_dims: dict,
222
+ output_dims: dict, act_fn_in: nn.ReLU(), act_fn_out: nn.ReLU()):
223
+ super().__init__()
224
+ self.n_layers = n_layers
225
+ self.out_dim_X = output_dims['X']
226
+ self.out_dim_E = output_dims['E']
227
+ self.out_dim_y = output_dims['y']
228
+
229
+ self.mlp_in_X = nn.Sequential(nn.Linear(input_dims['X'] + cond_dims, hidden_mlp_dims['X']), act_fn_in,
230
+ nn.Linear(hidden_mlp_dims['X'], hidden_dims['dx']), act_fn_in)
231
+
232
+ self.mlp_in_E = nn.Sequential(nn.Linear(input_dims['E'] + cond_dims, hidden_mlp_dims['E']), act_fn_in,
233
+ nn.Linear(hidden_mlp_dims['E'], hidden_dims['de']), act_fn_in)
234
+
235
+ self.mlp_in_y = nn.Sequential(nn.Linear(input_dims['y'], hidden_mlp_dims['y']), act_fn_in,
236
+ nn.Linear(hidden_mlp_dims['y'], hidden_dims['dy']), act_fn_in)
237
+
238
+ self.tf_layers = nn.ModuleList([XEyTransformerLayer(dx=hidden_dims['dx'],
239
+ de=hidden_dims['de'],
240
+ dy=hidden_dims['dy'],
241
+ n_head=hidden_dims['n_head'],
242
+ dim_ffX=hidden_dims['dim_ffX'],
243
+ dim_ffE=hidden_dims['dim_ffE'])
244
+ for i in range(n_layers)])
245
+
246
+ self.mlp_out_X = nn.Sequential(nn.Linear(hidden_dims['dx'], hidden_mlp_dims['X']), act_fn_out,
247
+ nn.Linear(hidden_mlp_dims['X'], output_dims['X']))
248
+
249
+ self.mlp_out_E = nn.Sequential(nn.Linear(hidden_dims['de'], hidden_mlp_dims['E']), act_fn_out,
250
+ nn.Linear(hidden_mlp_dims['E'], output_dims['E']))
251
+
252
+ self.mlp_out_y = nn.Sequential(nn.Linear(hidden_dims['dy'], hidden_mlp_dims['y']), act_fn_out,
253
+ nn.Linear(hidden_mlp_dims['y'], output_dims['y']))
254
+
255
+ def forward(self, X, E, y, node_mask):
256
+ bs, n = X.shape[0], X.shape[1]
257
+
258
+ diag_mask = torch.eye(n)
259
+ diag_mask = ~diag_mask.type_as(E).bool()
260
+ diag_mask = diag_mask.unsqueeze(0).unsqueeze(-1).expand(bs, -1, -1, -1)
261
+
262
+ X_to_out = X[..., :self.out_dim_X]
263
+ E_to_out = E[..., :self.out_dim_E]
264
+ y_to_out = y[..., :self.out_dim_y]
265
+
266
+ new_E = self.mlp_in_E(E)
267
+ new_E = (new_E + new_E.transpose(1, 2)) / 2
268
+
269
+ after_in = utils.PlaceHolder(X=self.mlp_in_X(X), E=new_E, y=self.mlp_in_y(y)).mask(node_mask)
270
+ X, E, y = after_in.X, after_in.E, after_in.y
271
+
272
+ for layer in self.tf_layers:
273
+ X, E, y = layer(X, E, y, node_mask)
274
+
275
+ X = self.mlp_out_X(X)
276
+ E = self.mlp_out_E(E)
277
+ y = self.mlp_out_y(y)
278
+
279
+ X = (X + X_to_out)
280
+ E = (E + E_to_out) * diag_mask
281
+ y = y + y_to_out
282
+
283
+ E = 1/2 * (E + torch.transpose(E, 1, 2))
284
+
285
+ return utils.PlaceHolder(X=X, E=E, y=y).mask(node_mask)
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ networkx==2.8.7
2
+ numpy==1.23
3
+ omegaconf==2.3.0
4
+ pytorch_lightning==2.0.4
5
+ torch_geometric==2.3.1
6
+ torchmetrics==0.11.4
7
+ tqdm==4.65.0
8
+ torch==2.3.0
9
+ torchvision==0.18.0
10
+ torchaudio==2.3.0
11
+ gradio==4.32.0
12
+ wandb==0.15.4
13
+ sentence-transformers==2.6.1
14
+ PyGSP==0.5.1
15
+ pyemd==1.0.0
utils.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch_geometric.utils
3
+ from omegaconf import OmegaConf, open_dict
4
+ from torch_geometric.utils import to_dense_adj, to_dense_batch
5
+ import torch
6
+ import omegaconf
7
+ import wandb
8
+
9
+ def create_folders(args):
10
+ try:
11
+ # os.makedirs('checkpoints')
12
+ os.makedirs('graphs')
13
+ os.makedirs('chains')
14
+ except OSError:
15
+ pass
16
+
17
+ try:
18
+ # os.makedirs('checkpoints/' + args.general.name)
19
+ os.makedirs('graphs/' + args.general.name)
20
+ os.makedirs('chains/' + args.general.name)
21
+ except OSError:
22
+ pass
23
+
24
+
25
+ def normalize(X, E, y, norm_values, norm_biases, node_mask):
26
+ X = (X - norm_biases[0]) / norm_values[0]
27
+ E = (E - norm_biases[1]) / norm_values[1]
28
+ y = (y - norm_biases[2]) / norm_values[2]
29
+
30
+ diag = torch.eye(E.shape[1], dtype=torch.bool).unsqueeze(0).expand(E.shape[0], -1, -1)
31
+ E[diag] = 0
32
+
33
+ return PlaceHolder(X=X, E=E, y=y).mask(node_mask)
34
+
35
+
36
+ def unnormalize(X, E, y, norm_values, norm_biases, node_mask, collapse=False):
37
+ """
38
+ X : node features
39
+ E : edge features
40
+ y : global features`
41
+ norm_values : [norm value X, norm value E, norm value y]
42
+ norm_biases : same order
43
+ node_mask
44
+ """
45
+ X = (X * norm_values[0] + norm_biases[0])
46
+ E = (E * norm_values[1] + norm_biases[1])
47
+ y = y * norm_values[2] + norm_biases[2]
48
+
49
+ return PlaceHolder(X=X, E=E, y=y).mask(node_mask, collapse)
50
+
51
+
52
+ def to_dense(x, edge_index, edge_attr, batch):
53
+ X, node_mask = to_dense_batch(x=x, batch=batch)
54
+ # node_mask = node_mask.float()
55
+ edge_index, edge_attr = torch_geometric.utils.remove_self_loops(edge_index, edge_attr)
56
+ # TODO: carefully check if setting node_mask as a bool breaks the continuous case
57
+ max_num_nodes = X.size(1)
58
+ E = to_dense_adj(edge_index=edge_index, batch=batch, edge_attr=edge_attr, max_num_nodes=max_num_nodes)
59
+ E = encode_no_edge(E)
60
+
61
+ return PlaceHolder(X=X, E=E, y=None), node_mask
62
+
63
+
64
+ def encode_no_edge(E):
65
+ assert len(E.shape) == 4
66
+ if E.shape[-1] == 0:
67
+ return E
68
+ no_edge = torch.sum(E, dim=3) == 0
69
+ first_elt = E[:, :, :, 0]
70
+ first_elt[no_edge] = 1
71
+ E[:, :, :, 0] = first_elt
72
+ diag = torch.eye(E.shape[1], dtype=torch.bool).unsqueeze(0).expand(E.shape[0], -1, -1)
73
+ E[diag] = 0
74
+ return E
75
+
76
+
77
+ def update_config_with_new_keys(cfg, saved_cfg):
78
+ saved_general = saved_cfg.general
79
+ saved_train = saved_cfg.train
80
+ saved_model = saved_cfg.model
81
+
82
+ for key, val in saved_general.items():
83
+ OmegaConf.set_struct(cfg.general, True)
84
+ with open_dict(cfg.general):
85
+ if key not in cfg.general.keys():
86
+ setattr(cfg.general, key, val)
87
+
88
+ OmegaConf.set_struct(cfg.train, True)
89
+ with open_dict(cfg.train):
90
+ for key, val in saved_train.items():
91
+ if key not in cfg.train.keys():
92
+ setattr(cfg.train, key, val)
93
+
94
+ OmegaConf.set_struct(cfg.model, True)
95
+ with open_dict(cfg.model):
96
+ for key, val in saved_model.items():
97
+ if key not in cfg.model.keys():
98
+ setattr(cfg.model, key, val)
99
+ return cfg
100
+
101
+
102
+ class PlaceHolder:
103
+ def __init__(self, X, E, y):
104
+ self.X = X
105
+ self.E = E
106
+ self.y = y
107
+
108
+ def type_as(self, x: torch.Tensor):
109
+ """ Changes the device and dtype of X, E, y. """
110
+ self.X = self.X.type_as(x)
111
+ self.E = self.E.type_as(x)
112
+ self.y = self.y.type_as(x)
113
+ return self
114
+
115
+ def mask(self, node_mask, collapse=False):
116
+ x_mask = node_mask.unsqueeze(-1) # bs, n, 1
117
+ e_mask1 = x_mask.unsqueeze(2) # bs, n, 1, 1
118
+ e_mask2 = x_mask.unsqueeze(1) # bs, 1, n, 1
119
+
120
+ if collapse:
121
+ self.X = torch.argmax(self.X, dim=-1)
122
+ self.E = torch.argmax(self.E, dim=-1)
123
+
124
+ self.X[node_mask == 0] = - 1
125
+ self.E[(e_mask1 * e_mask2).squeeze(-1) == 0] = - 1
126
+ else:
127
+ self.X = self.X * x_mask
128
+ self.E = self.E * e_mask1 * e_mask2
129
+ assert torch.allclose(self.E, torch.transpose(self.E, 1, 2))
130
+ return self
131
+
132
+ def setup_wandb(cfg):
133
+ config_dict = omegaconf.OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True)
134
+ kwargs = {'name': cfg.general.name, 'project': f'graph_ddm_{cfg.dataset.name}', 'config': config_dict,
135
+ 'settings': wandb.Settings(_disable_stats=True), 'reinit': True, 'mode': cfg.general.wandb}
136
+ wandb.init(**kwargs)
137
+ wandb.save('*.txt')