Spaces:

YuWang0103
/

LGGM-Text2Graph

Runtime error

App Files Files Community

YuWang0103 commited on May 30, 2024

Commit

6b59850

verified ·

1 Parent(s): 613b01f

Upload 41 files

Browse files

Files changed (41) hide show

analysis/.DS_Store +0 -0
analysis/__init__.py +0 -0
analysis/__pycache__/__init__.cpython-39 (nds4's conflicted copy 2024-05-30).pyc +0 -0
analysis/__pycache__/__init__.cpython-39.pyc +0 -0
analysis/__pycache__/dist_helper.cpython-39.pyc +0 -0
analysis/__pycache__/spectre_utils.cpython-39 (nds4's conflicted copy 2024-05-30).pyc +0 -0
analysis/__pycache__/spectre_utils.cpython-39.pyc +0 -0
analysis/dist_helper.py +156 -0
analysis/orca/orca +0 -0
analysis/orca/orca.cpp +1532 -0
analysis/orca/orca.h +1488 -0
analysis/orca/tmp_JJOX0U87.txt +25 -0
analysis/orca/tmp_YX4O2JRL.txt +3269 -0
analysis/rdkit_functions.py +334 -0
analysis/spectre_utils.py +928 -0
analysis/visualization.py +221 -0
app.py +89 -0
config.yaml +53 -0
dataset.py +395 -0
demo_model.py +214 -0
diffusion/__init__.py +0 -0
diffusion/__pycache__/__init__.cpython-39.pyc +0 -0
diffusion/__pycache__/diffusion_utils.cpython-39.pyc +0 -0
diffusion/__pycache__/noise_schedule.cpython-39.pyc +0 -0
diffusion/diffusion_utils.py +437 -0
diffusion/distributions.py +32 -0
diffusion/extra_features.py +275 -0
diffusion/extra_features_molecular.py +57 -0
diffusion/layers.py +19 -0
diffusion/noise_schedule.py +225 -0
diffusion/utils.py +137 -0
distributions.py +37 -0
extra_features.py +275 -0
models/__init__.py +0 -0
models/__pycache__/__init__.cpython-39.pyc +0 -0
models/__pycache__/layers.cpython-39.pyc +0 -0
models/__pycache__/transformer_model.cpython-39.pyc +0 -0
models/layers.py +46 -0
models/transformer_model.py +285 -0
requirements.txt +15 -0
utils.py +137 -0

analysis/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

analysis/__init__.py ADDED Viewed

File without changes

analysis/__pycache__/__init__.cpython-39 (nds4's conflicted copy 2024-05-30).pyc ADDED Viewed

Binary file (149 Bytes). View file

analysis/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (165 Bytes). View file

analysis/__pycache__/dist_helper.cpython-39.pyc ADDED Viewed

Binary file (4.57 kB). View file

analysis/__pycache__/spectre_utils.cpython-39 (nds4's conflicted copy 2024-05-30).pyc ADDED Viewed

Binary file (23.5 kB). View file

analysis/__pycache__/spectre_utils.cpython-39.pyc ADDED Viewed

Binary file (23.5 kB). View file

analysis/dist_helper.py ADDED Viewed

	@@ -0,0 +1,156 @@

+###############################################################################
+#
+# Adapted from https://github.com/lrjconan/GRAN/ which in turn is adapted from https://github.com/JiaxuanYou/graph-generation
+#
+###############################################################################
+import pyemd
+import numpy as np
+import concurrent.futures
+from functools import partial
+from scipy.linalg import toeplitz
+def emd(x, y, distance_scaling=1.0):
+    support_size = max(len(x), len(y))
+    d_mat = toeplitz(range(support_size)).astype(float)
+    distance_mat = d_mat / distance_scaling
+    # convert histogram values x and y to float, and make them equal len
+    x = x.astype(float)
+    y = y.astype(float)
+    if len(x) < len(y):
+        x = np.hstack((x, [0.0] * (support_size - len(x))))
+    elif len(y) < len(x):
+        y = np.hstack((y, [0.0] * (support_size - len(y))))
+    emd = pyemd.emd(x, y, distance_mat)
+    return emd
+def l2(x, y):
+    dist = np.linalg.norm(x - y, 2)
+    return dist
+def emd(x, y, sigma=1.0, distance_scaling=1.0):
+    ''' EMD
+        Args:
+            x, y: 1D pmf of two distributions with the same support
+            sigma: standard deviation
+    '''
+    support_size = max(len(x), len(y))
+    d_mat = toeplitz(range(support_size)).astype(float)
+    distance_mat = d_mat / distance_scaling
+    # convert histogram values x and y to float, and make them equal len
+    x = x.astype(float)
+    y = y.astype(float)
+    if len(x) < len(y):
+        x = np.hstack((x, [0.0] * (support_size - len(x))))
+    elif len(y) < len(x):
+        y = np.hstack((y, [0.0] * (support_size - len(y))))
+    return np.abs(pyemd.emd(x, y, distance_mat))
+def gaussian_emd(x, y, sigma=1.0, distance_scaling=1.0):
+    ''' Gaussian kernel with squared distance in exponential term replaced by EMD
+        Args:
+            x, y: 1D pmf of two distributions with the same support
+            sigma: standard deviation
+    '''
+    support_size = max(len(x), len(y))
+    d_mat = toeplitz(range(support_size)).astype(float)
+    distance_mat = d_mat / distance_scaling
+    # convert histogram values x and y to float, and make them equal len
+    x = x.astype(float)
+    y = y.astype(float)
+    if len(x) < len(y):
+        x = np.hstack((x, [0.0] * (support_size - len(x))))
+    elif len(y) < len(x):
+        y = np.hstack((y, [0.0] * (support_size - len(y))))
+    emd = pyemd.emd(x, y, distance_mat)
+    return np.exp(-emd * emd / (2 * sigma * sigma))
+def gaussian(x, y, sigma=1.0):
+    support_size = max(len(x), len(y))
+    # convert histogram values x and y to float, and make them equal len
+    x = x.astype(float)
+    y = y.astype(float)
+    if len(x) < len(y):
+        x = np.hstack((x, [0.0] * (support_size - len(x))))
+    elif len(y) < len(x):
+        y = np.hstack((y, [0.0] * (support_size - len(y))))
+    dist = np.linalg.norm(x - y, 2)
+    return np.exp(-dist * dist / (2 * sigma * sigma))
+def gaussian_tv(x, y, sigma=1.0):
+    support_size = max(len(x), len(y))
+    # convert histogram values x and y to float, and make them equal len
+    x = x.astype(float)
+    y = y.astype(float)
+    if len(x) < len(y):
+        x = np.hstack((x, [0.0] * (support_size - len(x))))
+    elif len(y) < len(x):
+        y = np.hstack((y, [0.0] * (support_size - len(y))))
+    dist = np.abs(x - y).sum() / 2.0
+    return np.exp(-dist * dist / (2 * sigma * sigma))
+def kernel_parallel_unpacked(x, samples2, kernel):
+    d = 0
+    for s2 in samples2:
+        d += kernel(x, s2)
+    return d
+def kernel_parallel_worker(t):
+    return kernel_parallel_unpacked(*t)
+def disc(samples1, samples2, kernel, is_parallel=True, *args, **kwargs):
+    ''' Discrepancy between 2 samples '''
+    d = 0
+    if not is_parallel:
+        for s1 in samples1:
+            for s2 in samples2:
+                d += kernel(s1, s2, *args, **kwargs)
+    else:
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            for dist in executor.map(kernel_parallel_worker, [
+                    (s1, samples2, partial(kernel, *args, **kwargs)) for s1 in samples1
+            ]):
+                d += dist
+    if len(samples1) * len(samples2) > 0:
+        d /= len(samples1) * len(samples2)
+    else:
+        d = 1e+6
+    return d
+def compute_mmd(samples1, samples2, kernel, is_hist=True, *args, **kwargs):
+    ''' MMD between two samples '''
+    # normalize histograms into pmf
+    if is_hist:
+        samples1 = [s1 / (np.sum(s1) + 1e-6) for s1 in samples1]
+        samples2 = [s2 / (np.sum(s2) + 1e-6) for s2 in samples2]
+    return disc(samples1, samples1, kernel, *args, **kwargs) + disc(samples2, samples2, kernel, *args, **kwargs) - \
+                2 * disc(samples1, samples2, kernel, *args, **kwargs)
+def compute_emd(samples1, samples2, kernel, is_hist=True, *args, **kwargs):
+    ''' EMD between average of two samples '''
+    # normalize histograms into pmf
+    if is_hist:
+        samples1 = [np.mean(samples1)]
+        samples2 = [np.mean(samples2)]
+    return disc(samples1, samples2, kernel, *args,
+                            **kwargs), [samples1[0], samples2[0]]

analysis/orca/orca ADDED Viewed

Binary file (95.5 kB). View file

analysis/orca/orca.cpp ADDED Viewed

	@@ -0,0 +1,1532 @@

+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cassert>
+#include <ctime>
+#include <iostream>
+#include <fstream>
+#include <set>
+#include <sstream>
+#include <unordered_map>
+#include <algorithm>
+using namespace std;
+typedef long long int64;
+typedef pair<int,int> PII;
+typedef struct { int first, second, third; } TIII;
+struct PAIR {
+    int a, b;
+    PAIR(int a0, int b0) { a=min(a0,b0); b=max(a0,b0); }
+};
+bool operator<(const PAIR &x, const PAIR &y) {
+    if (x.a==y.a) return x.b<y.b;
+    else return x.a<y.a;
+}
+bool operator==(const PAIR &x, const PAIR &y) {
+    return x.a==y.a && x.b==y.b;
+}
+struct hash_PAIR {
+    size_t operator()(const PAIR &x) const {
+        return (x.a<<8) ^ (x.b<<0);
+    }
+};
+struct TRIPLE {
+    int a, b, c;
+    TRIPLE(int a0, int b0, int c0) {
+        a=a0; b=b0; c=c0;
+        if (a>b) swap(a,b);
+        if (b>c) swap(b,c);
+        if (a>b) swap(a,b);
+    }
+};
+bool operator<(const TRIPLE &x, const TRIPLE &y) {
+    if (x.a==y.a) {
+        if (x.b==y.b) return x.c<y.c;
+        else return x.b<y.b;
+    } else return x.a<y.a;
+}
+bool operator==(const TRIPLE &x, const TRIPLE &y) {
+    return x.a==y.a && x.b==y.b && x.c==y.c;
+}
+struct hash_TRIPLE {
+    size_t operator()(const TRIPLE &x) const {
+        return (x.a<<16) ^ (x.b<<8) ^ (x.c<<0);
+    }
+};
+unordered_map<PAIR, int, hash_PAIR> common2;
+unordered_map<TRIPLE, int, hash_TRIPLE> common3;
+unordered_map<PAIR, int, hash_PAIR>::iterator common2_it;
+unordered_map<TRIPLE, int, hash_TRIPLE>::iterator common3_it;
+#define common3_get(x) (((common3_it=common3.find(x))!=common3.end())?(common3_it->second):0)
+#define common2_get(x) (((common2_it=common2.find(x))!=common2.end())?(common2_it->second):0)
+int n,m; // n = number of nodes, m = number of edges
+int *deg; // degrees of individual nodes
+PAIR *edges; // list of edges
+int **adj; // adj[x] - adjacency list of node x
+PII **inc; // inc[x] - incidence list of node x: (y, edge id)
+bool adjacent_list(int x, int y) { return binary_search(adj[x],adj[x]+deg[x],y); }
+int *adj_matrix; // compressed adjacency matrix
+const int adj_chunk = 8*sizeof(int);
+bool adjacent_matrix(int x, int y) { return adj_matrix[(x*n+y)/adj_chunk]&(1<<((x*n+y)%adj_chunk)); }
+bool (*adjacent)(int,int);
+int getEdgeId(int x, int y) { return inc[x][lower_bound(adj[x],adj[x]+deg[x],y)-adj[x]].second; }
+int64 **orbit; // orbit[x][o] - how many times does node x participate in orbit o
+int64 **eorbit; // eorbit[x][o] - how many times does node x participate in edge orbit o
+/** count graphlets on max 4 nodes */
+void count4() {
+    clock_t startTime, endTime;
+    startTime = clock();
+    clock_t startTime_all, endTime_all;
+    startTime_all = startTime;
+    int frac,frac_prev;
+    // precompute triangles that span over edges
+    printf("stage 1 - precomputing common nodes\n");
+    int *tri = (int*)calloc(m,sizeof(int));
+    frac_prev=-1;
+    for (int i=0;i<m;i++) {
+        frac = 100LL*i/m;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        int x=edges[i].a, y=edges[i].b;
+        for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
+            if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
+            else if (adj[x][xi]<adj[y][yi]) { xi++; }
+            else { yi++; }
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // count full graphlets
+    printf("stage 2 - counting full graphlets\n");
+    int64 *C4 = (int64*)calloc(n,sizeof(int64));
+    int *neigh = (int*)malloc(n*sizeof(int)), nn;
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=adj[x][nx];
+            if (y >= x) break;
+            nn=0;
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=adj[y][ny];
+                if (z >= y) break;
+                if (adjacent(x,z)==0) continue;
+                neigh[nn++]=z;
+            }
+            for (int i=0;i<nn;i++) {
+                int z = neigh[i];
+                for (int j=i+1;j<nn;j++) {
+                    int zz = neigh[j];
+                    if (adjacent(z,zz)) {
+                        C4[x]++; C4[y]++; C4[z]++; C4[zz]++;
+                    }
+                }
+            }
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // set up a system of equations relating orbits for every node
+    printf("stage 3 - building systems of equations\n");
+    int *common = (int*)calloc(n,sizeof(int));
+    int *common_list = (int*)malloc(n*sizeof(int)), nc=0;
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        int64 f_12_14=0, f_10_13=0;
+        int64 f_13_14=0, f_11_13=0;
+        int64 f_7_11=0, f_5_8=0;
+        int64 f_6_9=0, f_9_12=0, f_4_8=0, f_8_12=0;
+        int64 f_14=C4[x];
+        for (int i=0;i<nc;i++) common[common_list[i]]=0;
+        nc=0;
+        orbit[x][0]=deg[x];
+        // x - middle node
+        for (int nx1=0;nx1<deg[x];nx1++) {
+            int y=inc[x][nx1].first, ey=inc[x][nx1].second;
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=inc[y][ny].first, ez=inc[y][ny].second;
+                if (adjacent(x,z)) { // triangle
+                    if (z<y) {
+                        f_12_14 += tri[ez]-1;
+                        f_10_13 += (deg[y]-1-tri[ez])+(deg[z]-1-tri[ez]);
+                    }
+                } else {
+                    if (common[z]==0) common_list[nc++]=z;
+                    common[z]++;
+                }
+            }
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int z=inc[x][nx2].first, ez=inc[x][nx2].second;
+                if (adjacent(y,z)) { // triangle
+                    orbit[x][3]++;
+                    f_13_14 += (tri[ey]-1)+(tri[ez]-1);
+                    f_11_13 += (deg[x]-1-tri[ey])+(deg[x]-1-tri[ez]);
+                } else { // path
+                    orbit[x][2]++;
+                    f_7_11 += (deg[x]-1-tri[ey]-1)+(deg[x]-1-tri[ez]-1);
+                    f_5_8 += (deg[y]-1-tri[ey])+(deg[z]-1-tri[ez]);
+                }
+            }
+        }
+        // x - side node
+        for (int nx1=0;nx1<deg[x];nx1++) {
+            int y=inc[x][nx1].first, ey=inc[x][nx1].second;
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=inc[y][ny].first, ez=inc[y][ny].second;
+                if (x==z) continue;
+                if (!adjacent(x,z)) { // path
+                    orbit[x][1]++;
+                    f_6_9 += (deg[y]-1-tri[ey]-1);
+                    f_9_12 += tri[ez];
+                    f_4_8 += (deg[z]-1-tri[ez]);
+                    f_8_12 += (common[z]-1);
+                }
+            }
+        }
+        // solve system of equations
+        orbit[x][14]=(f_14);
+        orbit[x][13]=(f_13_14-6*f_14)/2;
+        orbit[x][12]=(f_12_14-3*f_14);
+        orbit[x][11]=(f_11_13-f_13_14+6*f_14)/2;
+        orbit[x][10]=(f_10_13-f_13_14+6*f_14);
+        orbit[x][9]=(f_9_12-2*f_12_14+6*f_14)/2;
+        orbit[x][8]=(f_8_12-2*f_12_14+6*f_14)/2;
+        orbit[x][7]=(f_13_14+f_7_11-f_11_13-6*f_14)/6;
+        orbit[x][6]=(2*f_12_14+f_6_9-f_9_12-6*f_14)/2;
+        orbit[x][5]=(2*f_12_14+f_5_8-f_8_12-6*f_14);
+        orbit[x][4]=(2*f_12_14+f_4_8-f_8_12-6*f_14);
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    endTime_all = endTime;
+    printf("total: %.2f\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
+}
+/** count edge orbits of graphlets on max 4 nodes */
+void ecount4() {
+    clock_t startTime, endTime;
+    startTime = clock();
+    clock_t startTime_all, endTime_all;
+    startTime_all = startTime;
+    int frac,frac_prev;
+    // precompute triangles that span over edges
+    printf("stage 1 - precomputing common nodes\n");
+    int *tri = (int*)calloc(m,sizeof(int));
+    frac_prev=-1;
+    for (int i=0;i<m;i++) {
+        frac = 100LL*i/m;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        int x=edges[i].a, y=edges[i].b;
+        for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
+            if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
+            else if (adj[x][xi]<adj[y][yi]) { xi++; }
+            else { yi++; }
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // count full graphlets
+    printf("stage 2 - counting full graphlets\n");
+    int64 *C4 = (int64*)calloc(m,sizeof(int64));
+    int *neighx = (int*)malloc(n*sizeof(int)); // lookup table - edges to neighbors of x
+    memset(neighx,-1,n*sizeof(int));
+    int *neigh = (int*)malloc(n*sizeof(int)), nn; // lookup table - common neighbors of x and y
+    PII *neigh_edges = (PII*)malloc(n*sizeof(PII)); // list of common neighbors of x and y
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            neighx[y]=xy;
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            if (y >= x) break;
+            nn=0;
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=inc[y][ny].first, yz=inc[y][ny].second;
+                if (z >= y) break;
+                if (neighx[z]==-1) continue;
+                int xz=neighx[z];
+                neigh[nn]=z;
+                neigh_edges[nn]={xz, yz};
+                nn++;
+            }
+            for (int i=0;i<nn;i++) {
+                int z = neigh[i], xz = neigh_edges[i].first, yz = neigh_edges[i].second;
+                for (int j=i+1;j<nn;j++) {
+                    int w = neigh[j], xw = neigh_edges[j].first, yw = neigh_edges[j].second;
+                    if (adjacent(z,w)) {
+                        C4[xy]++;
+                        C4[xz]++; C4[yz]++;
+                        C4[xw]++; C4[yw]++;
+                        // another iteration to count this last(smallest) edge instead of calling getEdgeId
+                        //int zw=getEdgeId(z,w); C4[zw]++;
+                    }
+                }
+            }
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            neighx[y]=-1;
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // count full graphlets for the smallest edge
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int nx=deg[x]-1;nx>=0;nx--) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            if (y <= x) break;
+            nn=0;
+            for (int ny=deg[y]-1;ny>=0;ny--) {
+                int z=adj[y][ny];
+                if (z <= y) break;
+                if (adjacent(x,z)==0) continue;
+                neigh[nn++]=z;
+            }
+            for (int i=0;i<nn;i++) {
+                int z = neigh[i];
+                for (int j=i+1;j<nn;j++) {
+                    int zz = neigh[j];
+                    if (adjacent(z,zz)) {
+                        C4[xy]++;
+                    }
+                }
+            }
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // set up a system of equations relating orbits for every node
+    printf("stage 3 - building systems of equations\n");
+    int *common = (int*)calloc(n,sizeof(int));
+    int *common_list = (int*)malloc(n*sizeof(int)), nc=0;
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        // common nodes of x and some other node
+        for (int i=0;i<nc;i++) common[common_list[i]]=0;
+        nc=0;
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=adj[x][nx];
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=adj[y][ny];
+                if (z==x) continue;
+                if (common[z]==0) common_list[nc++]=z;
+                common[z]++;
+            }
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            int e=xy;
+            for (int n1=0;n1<deg[x];n1++) {
+                int z=inc[x][n1].first, xz=inc[x][n1].second;
+                if (z==y) continue;
+                if (adjacent(y,z)) { // triangle
+                    if (x<y) {
+                        eorbit[e][1]++;
+                        eorbit[e][10] += tri[xy]-1;
+                        eorbit[e][7] += deg[z]-2;
+                    }
+                    eorbit[e][9] += tri[xz]-1;
+                    eorbit[e][8] += deg[x]-2;
+                }
+            }
+            for (int n1=0;n1<deg[y];n1++) {
+                int z=inc[y][n1].first, yz=inc[y][n1].second;
+                if (z==x) continue;
+                if (!adjacent(x,z)) { // path x-y-z
+                    eorbit[e][0]++;
+                    eorbit[e][6] += tri[yz];
+                    eorbit[e][5] += common[z]-1;
+                    eorbit[e][4] += deg[y]-2;
+                    eorbit[e][3] += deg[x]-1;
+                    eorbit[e][2] += deg[z]-1;
+                }
+            }
+        }
+    }
+    // solve system of equations
+    for (int e=0;e<m;e++) {
+        eorbit[e][11]=C4[e];
+        eorbit[e][10]=(eorbit[e][10]-2*eorbit[e][11])/2;
+        eorbit[e][9]=(eorbit[e][9]-4*eorbit[e][11]);
+        eorbit[e][8]=(eorbit[e][8]-eorbit[e][9]-4*eorbit[e][10]-4*eorbit[e][11]);
+        eorbit[e][7]=(eorbit[e][7]-eorbit[e][9]-2*eorbit[e][11]);
+        eorbit[e][6]=(eorbit[e][6]-eorbit[e][9])/2;
+        eorbit[e][5]=(eorbit[e][5]-eorbit[e][9])/2;
+        eorbit[e][4]=(eorbit[e][4]-2*eorbit[e][6]-eorbit[e][8]-eorbit[e][9])/2;
+        eorbit[e][3]=(eorbit[e][3]-2*eorbit[e][5]-eorbit[e][8]-eorbit[e][9])/2;
+        eorbit[e][2]=(eorbit[e][2]-2*eorbit[e][5]-2*eorbit[e][6]-eorbit[e][9]);
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    endTime_all = endTime;
+    printf("total: %.2f\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
+}
+/** count graphlets on max 5 nodes */
+void count5() {
+    clock_t startTime, endTime;
+    startTime = clock();
+    clock_t startTime_all, endTime_all;
+    startTime_all = startTime;
+    int frac,frac_prev;
+    // precompute common nodes
+    printf("stage 1 - precomputing common nodes\n");
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int n1=0;n1<deg[x];n1++) {
+            int a=adj[x][n1];
+            for (int n2=n1+1;n2<deg[x];n2++) {
+                int b=adj[x][n2];
+                PAIR ab=PAIR(a,b);
+                common2[ab]++;
+                for (int n3=n2+1;n3<deg[x];n3++) {
+                    int c=adj[x][n3];
+                    int st = adjacent(a,b)+adjacent(a,c)+adjacent(b,c);
+                    if (st<2) continue;
+                    TRIPLE abc=TRIPLE(a,b,c);
+                    common3[abc]++;
+                }
+            }
+        }
+    }
+    // precompute triangles that span over edges
+    int *tri = (int*)calloc(m,sizeof(int));
+    for (int i=0;i<m;i++) {
+        int x=edges[i].a, y=edges[i].b;
+        for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
+            if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
+            else if (adj[x][xi]<adj[y][yi]) { xi++; }
+            else { yi++; }
+        }
+    }
+    endTime = clock();
+    printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // count full graphlets
+    printf("stage 2 - counting full graphlets\n");
+    int64 *C5 = (int64*)calloc(n,sizeof(int64));
+    int *neigh = (int*)malloc(n*sizeof(int)), nn;
+    int *neigh2 = (int*)malloc(n*sizeof(int)), nn2;
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=adj[x][nx];
+            if (y >= x) break;
+            nn=0;
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=adj[y][ny];
+                if (z >= y) break;
+                if (adjacent(x,z)) {
+                    neigh[nn++]=z;
+                }
+            }
+            for (int i=0;i<nn;i++) {
+                int z = neigh[i];
+                nn2=0;
+                for (int j=i+1;j<nn;j++) {
+                    int zz = neigh[j];
+                    if (adjacent(z,zz)) {
+                        neigh2[nn2++]=zz;
+                    }
+                }
+                for (int i2=0;i2<nn2;i2++) {
+                    int zz = neigh2[i2];
+                    for (int j2=i2+1;j2<nn2;j2++) {
+                        int zzz = neigh2[j2];
+                        if (adjacent(zz,zzz)) {
+                            C5[x]++; C5[y]++; C5[z]++; C5[zz]++; C5[zzz]++;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    endTime = clock();
+    printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    int *common_x = (int*)calloc(n,sizeof(int));
+    int *common_x_list = (int*)malloc(n*sizeof(int)), ncx=0;
+    int *common_a = (int*)calloc(n,sizeof(int));
+    int *common_a_list = (int*)malloc(n*sizeof(int)), nca=0;
+    // set up a system of equations relating orbit counts
+    printf("stage 3 - building systems of equations\n");
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int i=0;i<ncx;i++) common_x[common_x_list[i]]=0;
+        ncx=0;
+        // smaller graphlets
+        orbit[x][0] = deg[x];
+        for (int nx1=0;nx1<deg[x];nx1++) {
+            int a=adj[x][nx1];
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int b=adj[x][nx2];
+                if (adjacent(a,b)) orbit[x][3]++;
+                else orbit[x][2]++;
+            }
+            for (int na=0;na<deg[a];na++) {
+                int b=adj[a][na];
+                if (b!=x && !adjacent(x,b)) {
+                    orbit[x][1]++;
+                    if (common_x[b]==0) common_x_list[ncx++]=b;
+                    common_x[b]++;
+                }
+            }
+        }
+        int64 f_71=0, f_70=0, f_67=0, f_66=0, f_58=0, f_57=0; // 14
+        int64 f_69=0, f_68=0, f_64=0, f_61=0, f_60=0, f_55=0, f_48=0, f_42=0, f_41=0; // 13
+        int64 f_65=0, f_63=0, f_59=0, f_54=0, f_47=0, f_46=0, f_40=0; // 12
+        int64 f_62=0, f_53=0, f_51=0, f_50=0, f_49=0, f_38=0, f_37=0, f_36=0; // 8
+        int64 f_44=0, f_33=0, f_30=0, f_26=0; // 11
+        int64 f_52=0, f_43=0, f_32=0, f_29=0, f_25=0; // 10
+        int64 f_56=0, f_45=0, f_39=0, f_31=0, f_28=0, f_24=0; // 9
+        int64 f_35=0, f_34=0, f_27=0, f_18=0, f_16=0, f_15=0; // 4
+        int64 f_17=0; // 5
+        int64 f_22=0, f_20=0, f_19=0; // 6
+        int64 f_23=0, f_21=0; // 7
+        for (int nx1=0;nx1<deg[x];nx1++) {
+            int a=inc[x][nx1].first, xa=inc[x][nx1].second;
+            for (int i=0;i<nca;i++) common_a[common_a_list[i]]=0;
+            nca=0;
+            for (int na=0;na<deg[a];na++) {
+                int b=adj[a][na];
+                for (int nb=0;nb<deg[b];nb++) {
+                    int c=adj[b][nb];
+                    if (c==a || adjacent(a,c)) continue;
+                    if (common_a[c]==0) common_a_list[nca++]=c;
+                    common_a[c]++;
+                }
+            }
+            // x = orbit-14 (tetrahedron)
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (!adjacent(a,b)) continue;
+                for (int nx3=nx2+1;nx3<deg[x];nx3++) {
+                    int c=inc[x][nx3].first, xc=inc[x][nx3].second;
+                    if (!adjacent(a,c) || !adjacent(b,c)) continue;
+                    orbit[x][14]++;
+                    f_70 += common3_get(TRIPLE(a,b,c))-1;
+                    f_71 += (tri[xa]>2 && tri[xb]>2)?(common3_get(TRIPLE(x,a,b))-1):0;
+                    f_71 += (tri[xa]>2 && tri[xc]>2)?(common3_get(TRIPLE(x,a,c))-1):0;
+                    f_71 += (tri[xb]>2 && tri[xc]>2)?(common3_get(TRIPLE(x,b,c))-1):0;
+                    f_67 += tri[xa]-2+tri[xb]-2+tri[xc]-2;
+                    f_66 += common2_get(PAIR(a,b))-2;
+                    f_66 += common2_get(PAIR(a,c))-2;
+                    f_66 += common2_get(PAIR(b,c))-2;
+                    f_58 += deg[x]-3;
+                    f_57 += deg[a]-3+deg[b]-3+deg[c]-3;
+                }
+            }
+            // x = orbit-13 (diamond)
+            for (int nx2=0;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (!adjacent(a,b)) continue;
+                for (int nx3=nx2+1;nx3<deg[x];nx3++) {
+                    int c=inc[x][nx3].first, xc=inc[x][nx3].second;
+                    if (!adjacent(a,c) || adjacent(b,c)) continue;
+                    orbit[x][13]++;
+                    f_69 += (tri[xb]>1 && tri[xc]>1)?(common3_get(TRIPLE(x,b,c))-1):0;
+                    f_68 += common3_get(TRIPLE(a,b,c))-1;
+                    f_64 += common2_get(PAIR(b,c))-2;
+                    f_61 += tri[xb]-1+tri[xc]-1;
+                    f_60 += common2_get(PAIR(a,b))-1;
+                    f_60 += common2_get(PAIR(a,c))-1;
+                    f_55 += tri[xa]-2;
+                    f_48 += deg[b]-2+deg[c]-2;
+                    f_42 += deg[x]-3;
+                    f_41 += deg[a]-3;
+                }
+            }
+            // x = orbit-12 (diamond)
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (!adjacent(a,b)) continue;
+                for (int na=0;na<deg[a];na++) {
+                    int c=inc[a][na].first, ac=inc[a][na].second;
+                    if (c==x || adjacent(x,c) || !adjacent(b,c)) continue;
+                    orbit[x][12]++;
+                    f_65 += (tri[ac]>1)?common3_get(TRIPLE(a,b,c)):0;
+                    f_63 += common_x[c]-2;
+                    f_59 += tri[ac]-1+common2_get(PAIR(b,c))-1;
+                    f_54 += common2_get(PAIR(a,b))-2;
+                    f_47 += deg[x]-2;
+                    f_46 += deg[c]-2;
+                    f_40 += deg[a]-3+deg[b]-3;
+                }
+            }
+            // x = orbit-8 (cycle)
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (adjacent(a,b)) continue;
+                for (int na=0;na<deg[a];na++) {
+                    int c=inc[a][na].first, ac=inc[a][na].second;
+                    if (c==x || adjacent(x,c) || !adjacent(b,c)) continue;
+                    orbit[x][8]++;
+                    f_62 += (tri[ac]>0)?common3_get(TRIPLE(a,b,c)):0;
+                    f_53 += tri[xa]+tri[xb];
+                    f_51 += tri[ac]+common2_get(PAIR(c,b));
+                    f_50 += common_x[c]-2;
+                    f_49 += common_a[b]-2;
+                    f_38 += deg[x]-2;
+                    f_37 += deg[a]-2+deg[b]-2;
+                    f_36 += deg[c]-2;
+                }
+            }
+            // x = orbit-11 (paw)
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (!adjacent(a,b)) continue;
+                for (int nx3=0;nx3<deg[x];nx3++) {
+                    int c=inc[x][nx3].first, xc=inc[x][nx3].second;
+                    if (c==a || c==b || adjacent(a,c) || adjacent(b,c)) continue;
+                    orbit[x][11]++;
+                    f_44 += tri[xc];
+                    f_33 += deg[x]-3;
+                    f_30 += deg[c]-1;
+                    f_26 += deg[a]-2+deg[b]-2;
+                }
+            }
+            // x = orbit-10 (paw)
+            for (int nx2=0;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (!adjacent(a,b)) continue;
+                for (int nb=0;nb<deg[b];nb++) {
+                    int c=inc[b][nb].first, bc=inc[b][nb].second;
+                    if (c==x || c==a || adjacent(a,c) || adjacent(x,c)) continue;
+                    orbit[x][10]++;
+                    f_52 += common_a[c]-1;
+                    f_43 += tri[bc];
+                    f_32 += deg[b]-3;
+                    f_29 += deg[c]-1;
+                    f_25 += deg[a]-2;
+                }
+            }
+            // x = orbit-9 (paw)
+            for (int na1=0;na1<deg[a];na1++) {
+                int b=inc[a][na1].first, ab=inc[a][na1].second;
+                if (b==x || adjacent(x,b)) continue;
+                for (int na2=na1+1;na2<deg[a];na2++) {
+                    int c=inc[a][na2].first, ac=inc[a][na2].second;
+                    if (c==x || !adjacent(b,c) || adjacent(x,c)) continue;
+                    orbit[x][9]++;
+                    f_56 += (tri[ab]>1 && tri[ac]>1)?common3_get(TRIPLE(a,b,c)):0;
+                    f_45 += common2_get(PAIR(b,c))-1;
+                    f_39 += tri[ab]-1+tri[ac]-1;
+                    f_31 += deg[a]-3;
+                    f_28 += deg[x]-1;
+                    f_24 += deg[b]-2+deg[c]-2;
+                }
+            }
+            // x = orbit-4 (path)
+            for (int na=0;na<deg[a];na++) {
+                int b=inc[a][na].first, ab=inc[a][na].second;
+                if (b==x || adjacent(x,b)) continue;
+                for (int nb=0;nb<deg[b];nb++) {
+                    int c=inc[b][nb].first, bc=inc[b][nb].second;
+                    if (c==a || adjacent(a,c) || adjacent(x,c)) continue;
+                    orbit[x][4]++;
+                    f_35 += common_a[c]-1;
+                    f_34 += common_x[c];
+                    f_27 += tri[bc];
+                    f_18 += deg[b]-2;
+                    f_16 += deg[x]-1;
+                    f_15 += deg[c]-1;
+                }
+            }
+            // x = orbit-5 (path)
+            for (int nx2=0;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (b==a || adjacent(a,b)) continue;
+                for (int nb=0;nb<deg[b];nb++) {
+                    int c=inc[b][nb].first, bc=inc[b][nb].second;
+                    if (c==x || adjacent(a,c) || adjacent(x,c)) continue;
+                    orbit[x][5]++;
+                    f_17 += deg[a]-1;
+                }
+            }
+            // x = orbit-6 (claw)
+            for (int na1=0;na1<deg[a];na1++) {
+                int b=inc[a][na1].first, ab=inc[a][na1].second;
+                if (b==x || adjacent(x,b)) continue;
+                for (int na2=na1+1;na2<deg[a];na2++) {
+                    int c=inc[a][na2].first, ac=inc[a][na2].second;
+                    if (c==x || adjacent(x,c) || adjacent(b,c)) continue;
+                    orbit[x][6]++;
+                    f_22 += deg[a]-3;
+                    f_20 += deg[x]-1;
+                    f_19 += deg[b]-1+deg[c]-1;
+                }
+            }
+            // x = orbit-7 (claw)
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (adjacent(a,b)) continue;
+                for (int nx3=nx2+1;nx3<deg[x];nx3++) {
+                    int c=inc[x][nx3].first, xc=inc[x][nx3].second;
+                    if (adjacent(a,c) || adjacent(b,c)) continue;
+                    orbit[x][7]++;
+                    f_23 += deg[x]-3;
+                    f_21 += deg[a]-1+deg[b]-1+deg[c]-1;
+                }
+            }
+        }
+        // solve equations
+        orbit[x][72] = C5[x];
+        orbit[x][71] = (f_71-12*orbit[x][72])/2;
+        orbit[x][70] = (f_70-4*orbit[x][72]);
+        orbit[x][69] = (f_69-2*orbit[x][71])/4;
+        orbit[x][68] = (f_68-2*orbit[x][71]);
+        orbit[x][67] = (f_67-12*orbit[x][72]-4*orbit[x][71]);
+        orbit[x][66] = (f_66-12*orbit[x][72]-2*orbit[x][71]-3*orbit[x][70]);
+        orbit[x][65] = (f_65-3*orbit[x][70])/2;
+        orbit[x][64] = (f_64-2*orbit[x][71]-4*orbit[x][69]-1*orbit[x][68]);
+        orbit[x][63] = (f_63-3*orbit[x][70]-2*orbit[x][68]);
+        orbit[x][62] = (f_62-1*orbit[x][68])/2;
+        orbit[x][61] = (f_61-4*orbit[x][71]-8*orbit[x][69]-2*orbit[x][67])/2;
+        orbit[x][60] = (f_60-4*orbit[x][71]-2*orbit[x][68]-2*orbit[x][67]);
+        orbit[x][59] = (f_59-6*orbit[x][70]-2*orbit[x][68]-4*orbit[x][65]);
+        orbit[x][58] = (f_58-4*orbit[x][72]-2*orbit[x][71]-1*orbit[x][67]);
+        orbit[x][57] = (f_57-12*orbit[x][72]-4*orbit[x][71]-3*orbit[x][70]-1*orbit[x][67]-2*orbit[x][66]);
+        orbit[x][56] = (f_56-2*orbit[x][65])/3;
+        orbit[x][55] = (f_55-2*orbit[x][71]-2*orbit[x][67])/3;
+        orbit[x][54] = (f_54-3*orbit[x][70]-1*orbit[x][66]-2*orbit[x][65])/2;
+        orbit[x][53] = (f_53-2*orbit[x][68]-2*orbit[x][64]-2*orbit[x][63]);
+        orbit[x][52] = (f_52-2*orbit[x][66]-2*orbit[x][64]-1*orbit[x][59])/2;
+        orbit[x][51] = (f_51-2*orbit[x][68]-2*orbit[x][63]-4*orbit[x][62]);
+        orbit[x][50] = (f_50-1*orbit[x][68]-2*orbit[x][63])/3;
+        orbit[x][49] = (f_49-1*orbit[x][68]-1*orbit[x][64]-2*orbit[x][62])/2;
+        orbit[x][48] = (f_48-4*orbit[x][71]-8*orbit[x][69]-2*orbit[x][68]-2*orbit[x][67]-2*orbit[x][64]-2*orbit[x][61]-1*orbit[x][60]);
+        orbit[x][47] = (f_47-3*orbit[x][70]-2*orbit[x][68]-1*orbit[x][66]-1*orbit[x][63]-1*orbit[x][60]);
+        orbit[x][46] = (f_46-3*orbit[x][70]-2*orbit[x][68]-2*orbit[x][65]-1*orbit[x][63]-1*orbit[x][59]);
+        orbit[x][45] = (f_45-2*orbit[x][65]-2*orbit[x][62]-3*orbit[x][56]);
+        orbit[x][44] = (f_44-1*orbit[x][67]-2*orbit[x][61])/4;
+        orbit[x][43] = (f_43-2*orbit[x][66]-1*orbit[x][60]-1*orbit[x][59])/2;
+        orbit[x][42] = (f_42-2*orbit[x][71]-4*orbit[x][69]-2*orbit[x][67]-2*orbit[x][61]-3*orbit[x][55]);
+        orbit[x][41] = (f_41-2*orbit[x][71]-1*orbit[x][68]-2*orbit[x][67]-1*orbit[x][60]-3*orbit[x][55]);
+        orbit[x][40] = (f_40-6*orbit[x][70]-2*orbit[x][68]-2*orbit[x][66]-4*orbit[x][65]-1*orbit[x][60]-1*orbit[x][59]-4*orbit[x][54]);
+        orbit[x][39] = (f_39-4*orbit[x][65]-1*orbit[x][59]-6*orbit[x][56])/2;
+        orbit[x][38] = (f_38-1*orbit[x][68]-1*orbit[x][64]-2*orbit[x][63]-1*orbit[x][53]-3*orbit[x][50]);
+        orbit[x][37] = (f_37-2*orbit[x][68]-2*orbit[x][64]-2*orbit[x][63]-4*orbit[x][62]-1*orbit[x][53]-1*orbit[x][51]-4*orbit[x][49]);
+        orbit[x][36] = (f_36-1*orbit[x][68]-2*orbit[x][63]-2*orbit[x][62]-1*orbit[x][51]-3*orbit[x][50]);
+        orbit[x][35] = (f_35-1*orbit[x][59]-2*orbit[x][52]-2*orbit[x][45])/2;
+        orbit[x][34] = (f_34-1*orbit[x][59]-2*orbit[x][52]-1*orbit[x][51])/2;
+        orbit[x][33] = (f_33-1*orbit[x][67]-2*orbit[x][61]-3*orbit[x][58]-4*orbit[x][44]-2*orbit[x][42])/2;
+        orbit[x][32] = (f_32-2*orbit[x][66]-1*orbit[x][60]-1*orbit[x][59]-2*orbit[x][57]-2*orbit[x][43]-2*orbit[x][41]-1*orbit[x][40])/2;
+        orbit[x][31] = (f_31-2*orbit[x][65]-1*orbit[x][59]-3*orbit[x][56]-1*orbit[x][43]-2*orbit[x][39]);
+        orbit[x][30] = (f_30-1*orbit[x][67]-1*orbit[x][63]-2*orbit[x][61]-1*orbit[x][53]-4*orbit[x][44]);
+        orbit[x][29] = (f_29-2*orbit[x][66]-2*orbit[x][64]-1*orbit[x][60]-1*orbit[x][59]-1*orbit[x][53]-2*orbit[x][52]-2*orbit[x][43]);
+        orbit[x][28] = (f_28-2*orbit[x][65]-2*orbit[x][62]-1*orbit[x][59]-1*orbit[x][51]-1*orbit[x][43]);
+        orbit[x][27] = (f_27-1*orbit[x][59]-1*orbit[x][51]-2*orbit[x][45])/2;
+        orbit[x][26] = (f_26-2*orbit[x][67]-2*orbit[x][63]-2*orbit[x][61]-6*orbit[x][58]-1*orbit[x][53]-2*orbit[x][47]-2*orbit[x][42]);
+        orbit[x][25] = (f_25-2*orbit[x][66]-2*orbit[x][64]-1*orbit[x][59]-2*orbit[x][57]-2*orbit[x][52]-1*orbit[x][48]-1*orbit[x][40])/2;
+        orbit[x][24] = (f_24-4*orbit[x][65]-4*orbit[x][62]-1*orbit[x][59]-6*orbit[x][56]-1*orbit[x][51]-2*orbit[x][45]-2*orbit[x][39]);
+        orbit[x][23] = (f_23-1*orbit[x][55]-1*orbit[x][42]-2*orbit[x][33])/4;
+        orbit[x][22] = (f_22-2*orbit[x][54]-1*orbit[x][40]-1*orbit[x][39]-1*orbit[x][32]-2*orbit[x][31])/3;
+        orbit[x][21] = (f_21-3*orbit[x][55]-3*orbit[x][50]-2*orbit[x][42]-2*orbit[x][38]-2*orbit[x][33]);
+        orbit[x][20] = (f_20-2*orbit[x][54]-2*orbit[x][49]-1*orbit[x][40]-1*orbit[x][37]-1*orbit[x][32]);
+        orbit[x][19] = (f_19-4*orbit[x][54]-4*orbit[x][49]-1*orbit[x][40]-2*orbit[x][39]-1*orbit[x][37]-2*orbit[x][35]-2*orbit[x][31]);
+        orbit[x][18] = (f_18-1*orbit[x][59]-1*orbit[x][51]-2*orbit[x][46]-2*orbit[x][45]-2*orbit[x][36]-2*orbit[x][27]-1*orbit[x][24])/2;
+        orbit[x][17] = (f_17-1*orbit[x][60]-1*orbit[x][53]-1*orbit[x][51]-1*orbit[x][48]-1*orbit[x][37]-2*orbit[x][34]-2*orbit[x][30])/2;
+        orbit[x][16] = (f_16-1*orbit[x][59]-2*orbit[x][52]-1*orbit[x][51]-2*orbit[x][46]-2*orbit[x][36]-2*orbit[x][34]-1*orbit[x][29]);
+        orbit[x][15] = (f_15-1*orbit[x][59]-2*orbit[x][52]-1*orbit[x][51]-2*orbit[x][45]-2*orbit[x][35]-2*orbit[x][34]-2*orbit[x][27]);
+    }
+    endTime = clock();
+    printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    endTime_all = endTime;
+    printf("total: %.2f sec\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
+}
+/** count edge orbits of graphlets on max 5 nodes */
+void ecount5() {
+    clock_t startTime, endTime;
+    startTime = clock();
+    clock_t startTime_all, endTime_all;
+    startTime_all = startTime;
+    int frac,frac_prev;
+    // precompute common nodes
+    printf("stage 1 - precomputing common nodes\n");
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int n1=0;n1<deg[x];n1++) {
+            int a=adj[x][n1];
+            for (int n2=n1+1;n2<deg[x];n2++) {
+                int b=adj[x][n2];
+                PAIR ab=PAIR(a,b);
+                common2[ab]++;
+                for (int n3=n2+1;n3<deg[x];n3++) {
+                    int c=adj[x][n3];
+                    int st = adjacent(a,b)+adjacent(a,c)+adjacent(b,c);
+                    if (st<2) continue;
+                    TRIPLE abc=TRIPLE(a,b,c);
+                    common3[abc]++;
+                }
+            }
+        }
+    }
+    // precompute triangles that span over edges
+    int *tri = (int*)calloc(m,sizeof(int));
+    for (int i=0;i<m;i++) {
+        int x=edges[i].a, y=edges[i].b;
+        for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
+            if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
+            else if (adj[x][xi]<adj[y][yi]) { xi++; }
+            else { yi++; }
+        }
+    }
+    endTime = clock();
+    printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // count full graphlets
+    printf("stage 2 - counting full graphlets\n");
+    int64 *C5 = (int64*)calloc(m,sizeof(int64));
+    int *neighx = (int*)malloc(n*sizeof(int)); // lookup table - edges to neighbors of x
+    memset(neighx,-1,n*sizeof(int));
+    int *neigh = (int*)malloc(n*sizeof(int)), nn; // lookup table - common neighbors of x and y
+    PII *neigh_edges = (PII*)malloc(n*sizeof(PII)); // list of common neighbors of x and y
+    int *neigh2 = (int*)malloc(n*sizeof(int)), nn2;
+    TIII *neigh2_edges = (TIII*)malloc(n*sizeof(TIII));
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            neighx[y]=xy;
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            if (y >= x) break;
+            nn=0;
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=inc[y][ny].first, yz=inc[y][ny].second;
+                if (z >= y) break;
+                if (neighx[z]==-1) continue;
+                int xz=neighx[z];
+                neigh[nn]=z;
+                neigh_edges[nn]={xz, yz};
+                nn++;
+            }
+            for (int i=0;i<nn;i++) {
+                int z = neigh[i], xz = neigh_edges[i].first, yz = neigh_edges[i].second;
+                nn2 = 0;
+                for (int j=i+1;j<nn;j++) {
+                    int w = neigh[j], xw = neigh_edges[j].first, yw = neigh_edges[j].second;
+                    if (adjacent(z,w)) {
+                        neigh2[nn2]=w;
+                        int zw=getEdgeId(z,w);
+                        neigh2_edges[nn2]={xw,yw,zw};
+                        nn2++;
+                    }
+                }
+                for (int i2=0;i2<nn2;i2++) {
+                    int z2 = neigh2[i2];
+                    int z2x=neigh2_edges[i2].first, z2y=neigh2_edges[i2].second, z2z=neigh2_edges[i2].third;
+                    for (int j2=i2+1;j2<nn2;j2++) {
+                        int z3 = neigh2[j2];
+                        int z3x=neigh2_edges[j2].first, z3y=neigh2_edges[j2].second, z3z=neigh2_edges[j2].third;
+                        if (adjacent(z2,z3)) {
+                            int zid=getEdgeId(z2,z3);
+                            C5[xy]++; C5[xz]++; C5[yz]++;
+                            C5[z2x]++; C5[z2y]++; C5[z2z]++;
+                            C5[z3x]++; C5[z3y]++; C5[z3z]++;
+                            C5[zid]++;
+                        }
+                    }
+                }
+            }
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            neighx[y]=-1;
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // set up a system of equations relating orbits for every node
+    printf("stage 3 - building systems of equations\n");
+    int *common_x = (int*)calloc(n,sizeof(int));
+    int *common_x_list = (int*)malloc(n*sizeof(int)), nc_x=0;
+    int *common_y = (int*)calloc(n,sizeof(int));
+    int *common_y_list = (int*)malloc(n*sizeof(int)), nc_y=0;
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        // common nodes of x and some other node
+        for (int i=0;i<nc_x;i++) common_x[common_x_list[i]]=0;
+        nc_x=0;
+        for (int nx=0;nx<deg[x];nx++) {
+            int a=adj[x][nx];
+            for (int na=0;na<deg[a];na++) {
+                int z=adj[a][na];
+                if (z==x) continue;
+                if (common_x[z]==0) common_x_list[nc_x++]=z;
+                common_x[z]++;
+            }
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            int e=xy;
+            if (y>=x) break;
+            // common nodes of y and some other node
+            for (int i=0;i<nc_y;i++) common_y[common_y_list[i]]=0;
+            nc_y=0;
+            for (int ny=0;ny<deg[y];ny++) {
+                int a=adj[y][ny];
+                for (int na=0;na<deg[a];na++) {
+                    int z=adj[a][na];
+                    if (z==y) continue;
+                    if (common_y[z]==0) common_y_list[nc_y++]=z;
+                    common_y[z]++;
+                }
+            }
+            int64 f_66=0, f_65=0, f_62=0, f_61=0, f_60=0, f_51=0, f_50=0; // 11
+            int64 f_64=0, f_58=0, f_55=0, f_48=0, f_41=0, f_35=0; // 10
+            int64 f_63=0, f_59=0, f_57=0, f_54=0, f_53=0, f_52=0, f_47=0, f_40=0, f_39=0, f_34=0, f_33=0; // 9
+            int64 f_45=0, f_36=0, f_26=0, f_23=0, f_19=0; // 7
+            int64 f_49=0, f_38=0, f_37=0, f_32=0, f_25=0, f_22=0, f_18=0; // 6
+            int64 f_56=0, f_46=0, f_44=0, f_43=0, f_42=0, f_31=0, f_30=0; // 5
+            int64 f_27=0, f_17=0, f_15=0; // 4
+            int64 f_20=0, f_16=0, f_13=0; // 3
+            int64 f_29=0, f_28=0, f_24=0, f_21=0, f_14=0, f_12=0; // 2
+            // smaller (3-node) graphlets
+            orbit[x][0] = deg[x];
+            for (int nx1=0;nx1<deg[x];nx1++) {
+                int z=adj[x][nx1];
+                if (z==y) continue;
+                if (adjacent(y,z)) eorbit[e][1]++;
+                else eorbit[e][0]++;
+            }
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=adj[y][ny];
+                if (z==x) continue;
+                if (!adjacent(x,z)) eorbit[e][0]++;
+            }
+            // edge-orbit 11 = (14,14)
+            for (int nx1=0;nx1<deg[x];nx1++) {
+                int a=adj[x][nx1], xa=inc[x][nx1].second;
+                if (a==y || !adjacent(y,a)) continue;
+                for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                    int b=adj[x][nx2], xb=inc[x][nx2].second;
+                    if (b==y || !adjacent(y,b) || !adjacent(a,b)) continue;
+                    int ya=getEdgeId(y,a), yb=getEdgeId(y,b), ab=getEdgeId(a,b);
+                    eorbit[e][11]++;
+                    f_66 += common3_get(TRIPLE(x,y,a))-1;
+                    f_66 += common3_get(TRIPLE(x,y,b))-1;
+                    f_65 += common3_get(TRIPLE(a,b,x))-1;
+                    f_65 += common3_get(TRIPLE(a,b,y))-1;
+                    f_62 += tri[xy]-2;
+                    f_61 += (tri[xa]-2)+(tri[xb]-2)+(tri[ya]-2)+(tri[yb]-2);
+                    f_60 += tri[ab]-2;
+                    f_51 += (deg[x]-3)+(deg[y]-3);
+                    f_50 += (deg[a]-3)+(deg[b]-3);
+                }
+            }
+            // edge-orbit 10 = (13,13)
+            for (int nx1=0;nx1<deg[x];nx1++) {
+                int a=adj[x][nx1], xa=inc[x][nx1].second;
+                if (a==y || !adjacent(y,a)) continue;
+                for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                    int b=adj[x][nx2], xb=inc[x][nx2].second;
+                    if (b==y || !adjacent(y,b) || adjacent(a,b)) continue;
+                    int ya=getEdgeId(y,a), yb=getEdgeId(y,b);
+                    eorbit[e][10]++;
+                    f_64 += common3_get(TRIPLE(a,b,x))-1;
+                    f_64 += common3_get(TRIPLE(a,b,y))-1;
+                    f_58 += common2_get(PAIR(a,b))-2;
+                    f_55 += (tri[xa]-1)+(tri[xb]-1)+(tri[ya]-1)+(tri[yb]-1);
+                    f_48 += tri[xy]-2;
+                    f_41 += (deg[a]-2)+(deg[b]-2);
+                    f_35 += (deg[x]-3)+(deg[y]-3);
+                }
+            }
+            // edge-orbit 9 = (12,13)
+            for (int nx=0;nx<deg[x];nx++) {
+                int a=adj[x][nx], xa=inc[x][nx].second;
+                if (a==y) continue;
+                for (int ny=0;ny<deg[y];ny++) {
+                    int b=adj[y][ny], yb=inc[y][ny].second;
+                    if (b==x || !adjacent(a,b)) continue;
+                    int adj_ya=adjacent(y,a), adj_xb=adjacent(x,b);
+                    if (adj_ya+adj_xb!=1) continue;
+                    int ab=getEdgeId(a,b);
+                    eorbit[e][9]++;
+                    if (adj_xb) {
+                        int xb=getEdgeId(x,b);
+                        f_63 += common3_get(TRIPLE(a,b,y))-1;
+                        f_59 += common3_get(TRIPLE(a,b,x));
+                        f_57 += common_y[a]-2;
+                        f_54 += tri[yb]-1;
+                        f_53 += tri[xa]-1;
+                        f_47 += tri[xb]-2;
+                        f_40 += deg[y]-2;
+                        f_39 += deg[a]-2;
+                        f_34 += deg[x]-3;
+                        f_33 += deg[b]-3;
+                    } else if (adj_ya) {
+                        int ya=getEdgeId(y,a);
+                        f_63 += common3_get(TRIPLE(a,b,x))-1;
+                        f_59 += common3_get(TRIPLE(a,b,y));
+                        f_57 += common_x[b]-2;
+                        f_54 += tri[xa]-1;
+                        f_53 += tri[yb]-1;
+                        f_47 += tri[ya]-2;
+                        f_40 += deg[x]-2;
+                        f_39 += deg[b]-2;
+                        f_34 += deg[y]-3;
+                        f_33 += deg[a]-3;
+                    }
+                    f_52 += tri[ab]-1;
+                }
+            }
+            // edge-orbit 8 = (10,11)
+            for (int nx=0;nx<deg[x];nx++) {
+                int a=adj[x][nx];
+                if (a==y || !adjacent(y,a)) continue;
+                for (int nx1=0;nx1<deg[x];nx1++) {
+                    int b=adj[x][nx1];
+                    if (b==y || b==a || adjacent(y,b) || adjacent(a,b)) continue;
+                    eorbit[e][8]++;
+                }
+                for (int ny1=0;ny1<deg[y];ny1++) {
+                    int b=adj[y][ny1];
+                    if (b==x || b==a || adjacent(x,b) || adjacent(a,b)) continue;
+                    eorbit[e][8]++;
+                }
+            }
+            // edge-orbit 7 = (10,10)
+            for (int nx=0;nx<deg[x];nx++) {
+                int a=adj[x][nx];
+                if (a==y || !adjacent(y,a)) continue;
+                for (int na=0;na<deg[a];na++) {
+                    int b=adj[a][na], ab=inc[a][na].second;
+                    if (b==x || b==y || adjacent(x,b) || adjacent(y,b)) continue;
+                    eorbit[e][7]++;
+                    f_45 += common_x[b]-1;
+                    f_45 += common_y[b]-1;
+                    f_36 += tri[ab];
+                    f_26 += deg[a]-3;
+                    f_23 += deg[b]-1;
+                    f_19 += (deg[x]-2)+(deg[y]-2);
+                }
+            }
+            // edge-orbit 6 = (9,11)
+            for (int ny1=0;ny1<deg[y];ny1++) {
+                int a=adj[y][ny1], ya=inc[y][ny1].second;
+                if (a==x || adjacent(x,a)) continue;
+                for (int ny2=ny1+1;ny2<deg[y];ny2++) {
+                    int b=adj[y][ny2], yb=inc[y][ny2].second;
+                    if (b==x || adjacent(x,b) || !adjacent(a,b)) continue;
+                    int ab=getEdgeId(a,b);
+                    eorbit[e][6]++;
+                    f_49 += common3_get(TRIPLE(y,a,b));
+                    f_38 += tri[ab]-1;
+                    f_37 += tri[xy];
+                    f_32 += (tri[ya]-1)+(tri[yb]-1);
+                    f_25 += deg[y]-3;
+                    f_22 += deg[x]-1;
+                    f_18 += (deg[a]-2)+(deg[b]-2);
+                }
+            }
+            for (int nx1=0;nx1<deg[x];nx1++) {
+                int a=adj[x][nx1], xa=inc[x][nx1].second;
+                if (a==y || adjacent(y,a)) continue;
+                for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                    int b=adj[x][nx2], xb=inc[x][nx2].second;
+                    if (b==y || adjacent(y,b) || !adjacent(a,b)) continue;
+                    int ab=getEdgeId(a,b);
+                    eorbit[e][6]++;
+                    f_49 += common3_get(TRIPLE(x,a,b));
+                    f_38 += tri[ab]-1;
+                    f_37 += tri[xy];
+                    f_32 += (tri[xa]-1)+(tri[xb]-1);
+                    f_25 += deg[x]-3;
+                    f_22 += deg[y]-1;
+                    f_18 += (deg[a]-2)+(deg[b]-2);
+                }
+            }
+            // edge-orbit 5 = (8,8)
+            for (int nx=0;nx<deg[x];nx++) {
+                int a=adj[x][nx], xa=inc[x][nx].second;
+                if (a==y || adjacent(y,a)) continue;
+                for (int ny=0;ny<deg[y];ny++) {
+                    int b=adj[y][ny], yb=inc[y][ny].second;
+                    if (b==x || adjacent(x,b) || !adjacent(a,b)) continue;
+                    int ab=getEdgeId(a,b);
+                    eorbit[e][5]++;
+                    f_56 += common3_get(TRIPLE(x,a,b));
+                    f_56 += common3_get(TRIPLE(y,a,b));
+                    f_46 += tri[xy];
+                    f_44 += tri[xa]+tri[yb];
+                    f_43 += tri[ab];
+                    f_42 += common_x[b]-2;
+                    f_42 += common_y[a]-2;
+                    f_31 += (deg[x]-2)+(deg[y]-2);
+                    f_30 += (deg[a]-2)+(deg[b]-2);
+                }
+            }
+            // edge-orbit 4 = (6,7)
+            for (int ny1=0;ny1<deg[y];ny1++) {
+                int a=adj[y][ny1];
+                if (a==x || adjacent(x,a)) continue;
+                for (int ny2=ny1+1;ny2<deg[y];ny2++) {
+                    int b=adj[y][ny2];
+                    if (b==x || adjacent(x,b) || adjacent(a,b)) continue;
+                    eorbit[e][4]++;
+                    f_27 += tri[xy];
+                    f_17 += deg[y]-3;
+                    f_15 += (deg[a]-1)+(deg[b]-1);
+                }
+            }
+            for (int nx1=0;nx1<deg[x];nx1++) {
+                int a=adj[x][nx1];
+                if (a==y || adjacent(y,a)) continue;
+                for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                    int b=adj[x][nx2];
+                    if (b==y || adjacent(y,b) || adjacent(a,b)) continue;
+                    eorbit[e][4]++;
+                    f_27 += tri[xy];
+                    f_17 += deg[x]-3;
+                    f_15 += (deg[a]-1)+(deg[b]-1);
+                }
+            }
+            // edge-orbit 3 = (5,5)
+            for (int nx=0;nx<deg[x];nx++) {
+                int a=adj[x][nx];
+                if (a==y || adjacent(y,a)) continue;
+                for (int ny=0;ny<deg[y];ny++) {
+                    int b=adj[y][ny];
+                    if (b==x || adjacent(x,b) || adjacent(a,b)) continue;
+                    eorbit[e][3]++;
+                    f_20 += tri[xy];
+                    f_16 += (deg[x]-2)+(deg[y]-2);
+                    f_13 += (deg[a]-1)+(deg[b]-1);
+                }
+            }
+            // edge-orbit 2 = (4,5)
+            for (int ny=0;ny<deg[y];ny++) {
+                int a=adj[y][ny];
+                if (a==x || adjacent(x,a)) continue;
+                for (int na=0;na<deg[a];na++) {
+                    int b=adj[a][na], ab=inc[a][na].second;
+                    if (b==y || adjacent(y,b) || adjacent(x,b)) continue;
+                    eorbit[e][2]++;
+                    f_29 += common_y[b]-1;
+                    f_28 += common_x[b];
+                    f_24 += tri[xy];
+                    f_21 += tri[ab];
+                    f_14 += deg[a]-2;
+                    f_12 += deg[b]-1;
+                }
+            }
+            for (int nx=0;nx<deg[x];nx++) {
+                int a=adj[x][nx];
+                if (a==y || adjacent(y,a)) continue;
+                for (int na=0;na<deg[a];na++) {
+                    int b=adj[a][na], ab=inc[a][na].second;
+                    if (b==x || adjacent(x,b) || adjacent(y,b)) continue;
+                    eorbit[e][2]++;
+                    f_29 += common_x[b]-1;
+                    f_28 += common_y[b];
+                    f_24 += tri[xy];
+                    f_21 += tri[ab];
+                    f_14 += deg[a]-2;
+                    f_12 += deg[b]-1;
+                }
+            }
+            // solve system of equations
+            eorbit[e][67]=C5[e];
+            eorbit[e][66]=(f_66-6*eorbit[e][67])/2;
+            eorbit[e][65]=(f_65-6*eorbit[e][67]);
+            eorbit[e][64]=(f_64-2*eorbit[e][66]);
+            eorbit[e][63]=(f_63-2*eorbit[e][65])/2;
+            eorbit[e][62]=(f_62-2*eorbit[e][66]-3*eorbit[e][67]);
+            eorbit[e][61]=(f_61-2*eorbit[e][65]-4*eorbit[e][66]-12*eorbit[e][67]);
+            eorbit[e][60]=(f_60-1*eorbit[e][65]-3*eorbit[e][67]);
+            eorbit[e][59]=(f_59-2*eorbit[e][65])/2;
+            eorbit[e][58]=(f_58-1*eorbit[e][64]-1*eorbit[e][66]);
+            eorbit[e][57]=(f_57-2*eorbit[e][63]-2*eorbit[e][64]-2*eorbit[e][65]);
+            eorbit[e][56]=(f_56-2*eorbit[e][63])/2;
+            eorbit[e][55]=(f_55-4*eorbit[e][62]-2*eorbit[e][64]-4*eorbit[e][66]);
+            eorbit[e][54]=(f_54-1*eorbit[e][61]-2*eorbit[e][63]-2*eorbit[e][65])/2;
+            eorbit[e][53]=(f_53-2*eorbit[e][59]-2*eorbit[e][64]-2*eorbit[e][65]);
+            eorbit[e][52]=(f_52-2*eorbit[e][59]-2*eorbit[e][63]-2*eorbit[e][65]);
+            eorbit[e][51]=(f_51-1*eorbit[e][61]-2*eorbit[e][62]-1*eorbit[e][65]-4*eorbit[e][66]-6*eorbit[e][67]);
+            eorbit[e][50]=(f_50-2*eorbit[e][60]-1*eorbit[e][61]-2*eorbit[e][65]-2*eorbit[e][66]-6*eorbit[e][67]);
+            eorbit[e][49]=(f_49-1*eorbit[e][59])/3;
+            eorbit[e][48]=(f_48-2*eorbit[e][62]-1*eorbit[e][66])/3;
+            eorbit[e][47]=(f_47-2*eorbit[e][59]-1*eorbit[e][61]-2*eorbit[e][65])/2;
+            eorbit[e][46]=(f_46-1*eorbit[e][57]-1*eorbit[e][63]);
+            eorbit[e][45]=(f_45-1*eorbit[e][52]-4*eorbit[e][58]-4*eorbit[e][60]);
+            eorbit[e][44]=(f_44-2*eorbit[e][56]-1*eorbit[e][57]-2*eorbit[e][63]);
+            eorbit[e][43]=(f_43-2*eorbit[e][56]-1*eorbit[e][63]);
+            eorbit[e][42]=(f_42-2*eorbit[e][56]-1*eorbit[e][57]-2*eorbit[e][63])/2;
+            eorbit[e][41]=(f_41-1*eorbit[e][55]-2*eorbit[e][58]-2*eorbit[e][62]-2*eorbit[e][64]-2*eorbit[e][66]);
+            eorbit[e][40]=(f_40-2*eorbit[e][54]-1*eorbit[e][55]-1*eorbit[e][57]-1*eorbit[e][61]-2*eorbit[e][63]-2*eorbit[e][64]-2*eorbit[e][65]);
+            eorbit[e][39]=(f_39-1*eorbit[e][52]-1*eorbit[e][53]-1*eorbit[e][57]-2*eorbit[e][59]-2*eorbit[e][63]-2*eorbit[e][64]-2*eorbit[e][65]);
+            eorbit[e][38]=(f_38-3*eorbit[e][49]-1*eorbit[e][56]-1*eorbit[e][59]);
+            eorbit[e][37]=(f_37-1*eorbit[e][53]-1*eorbit[e][59]);
+            eorbit[e][36]=(f_36-1*eorbit[e][52]-2*eorbit[e][60])/2;
+            eorbit[e][35]=(f_35-6*eorbit[e][48]-1*eorbit[e][55]-4*eorbit[e][62]-1*eorbit[e][64]-2*eorbit[e][66]);
+            eorbit[e][34]=(f_34-2*eorbit[e][47]-1*eorbit[e][53]-1*eorbit[e][55]-2*eorbit[e][59]-1*eorbit[e][61]-2*eorbit[e][64]-2*eorbit[e][65]);
+            eorbit[e][33]=(f_33-2*eorbit[e][47]-1*eorbit[e][52]-2*eorbit[e][54]-2*eorbit[e][59]-1*eorbit[e][61]-2*eorbit[e][63]-2*eorbit[e][65]);
+            eorbit[e][32]=(f_32-6*eorbit[e][49]-1*eorbit[e][53]-2*eorbit[e][59])/2;
+            eorbit[e][31]=(f_31-2*eorbit[e][42]-1*eorbit[e][44]-2*eorbit[e][46]-2*eorbit[e][56]-2*eorbit[e][57]-2*eorbit[e][63]);
+            eorbit[e][30]=(f_30-2*eorbit[e][42]-2*eorbit[e][43]-1*eorbit[e][44]-4*eorbit[e][56]-1*eorbit[e][57]-2*eorbit[e][63]);
+            eorbit[e][29]=(f_29-2*eorbit[e][38]-1*eorbit[e][45]-1*eorbit[e][52])/2;
+            eorbit[e][28]=(f_28-2*eorbit[e][43]-1*eorbit[e][45]-1*eorbit[e][52])/2;
+            eorbit[e][27]=(f_27-1*eorbit[e][34]-1*eorbit[e][47]);
+            eorbit[e][26]=(f_26-1*eorbit[e][33]-2*eorbit[e][36]-1*eorbit[e][50]-1*eorbit[e][52]-2*eorbit[e][60])/2;
+            eorbit[e][25]=(f_25-2*eorbit[e][32]-1*eorbit[e][37]-3*eorbit[e][49]-1*eorbit[e][53]-1*eorbit[e][59]);
+            eorbit[e][24]=(f_24-1*eorbit[e][39]-1*eorbit[e][45]-1*eorbit[e][52]);
+            eorbit[e][23]=(f_23-2*eorbit[e][36]-1*eorbit[e][45]-1*eorbit[e][52]-2*eorbit[e][58]-2*eorbit[e][60]);
+            eorbit[e][22]=(f_22-1*eorbit[e][37]-1*eorbit[e][44]-1*eorbit[e][53]-1*eorbit[e][56]-1*eorbit[e][59]);
+            eorbit[e][21]=(f_21-2*eorbit[e][38]-2*eorbit[e][43]-1*eorbit[e][52])/2;
+            eorbit[e][20]=(f_20-1*eorbit[e][40]-1*eorbit[e][54]);
+            eorbit[e][19]=(f_19-1*eorbit[e][33]-2*eorbit[e][41]-1*eorbit[e][45]-2*eorbit[e][50]-1*eorbit[e][52]-4*eorbit[e][58]-4*eorbit[e][60]);
+            eorbit[e][18]=(f_18-2*eorbit[e][32]-2*eorbit[e][38]-1*eorbit[e][44]-6*eorbit[e][49]-1*eorbit[e][53]-2*eorbit[e][56]-2*eorbit[e][59]);
+            eorbit[e][17]=(f_17-2*eorbit[e][25]-1*eorbit[e][27]-1*eorbit[e][32]-1*eorbit[e][34]-1*eorbit[e][47])/3;
+            eorbit[e][16]=(f_16-2*eorbit[e][20]-2*eorbit[e][22]-1*eorbit[e][31]-2*eorbit[e][40]-1*eorbit[e][44]-2*eorbit[e][54])/2;
+            eorbit[e][15]=(f_15-2*eorbit[e][25]-2*eorbit[e][29]-1*eorbit[e][31]-2*eorbit[e][32]-1*eorbit[e][34]-2*eorbit[e][42]-2*eorbit[e][47]);
+            eorbit[e][14]=(f_14-1*eorbit[e][18]-2*eorbit[e][21]-1*eorbit[e][30]-2*eorbit[e][38]-1*eorbit[e][39]-2*eorbit[e][43]-1*eorbit[e][52])/2;
+            eorbit[e][13]=(f_13-2*eorbit[e][22]-2*eorbit[e][28]-1*eorbit[e][31]-1*eorbit[e][40]-2*eorbit[e][44]-2*eorbit[e][54]);
+            eorbit[e][12]=(f_12-2*eorbit[e][21]-2*eorbit[e][28]-2*eorbit[e][29]-2*eorbit[e][38]-2*eorbit[e][43]-1*eorbit[e][45]-1*eorbit[e][52]);
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    endTime_all = endTime;
+    printf("total: %.2f\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
+}
+int writeResults(int g, const char* output_filename) {
+    fstream fout;
+    if (fout.fail()) {
+        cerr << "Failed to open file " << output_filename << endl;
+        return 1;
+    }
+    fout.open(output_filename, fstream::out | fstream::binary);
+    int no[] = {0,0,1,4,15,73};
+    for (int i=0;i<n;i++) {
+        for (int j=0;j<no[g];j++) {
+            if (j!=0)
+                fout << " ";
+            fout << orbit[i][j];
+        }
+        fout << endl;
+    }
+    fout.close();
+}
+string writeResultsString(int g) {
+    std::stringstream ss("", ios_base::app | ios_base::out);
+    int no[] = {0,0,1,4,15,73};
+    for (int i=0;i<n;i++) {
+        for (int j=0;j<no[g];j++) {
+            if (j!=0)
+                ss << " ";
+            ss << orbit[i][j];
+        }
+        ss << endl;
+    }
+    return ss.str();
+}
+int writeEdgeResults(int g, const char* output_filename) {
+    fstream fout;
+    if (fout.fail()) {
+        cerr << "Failed to open file " << output_filename << endl;
+        return 1;
+    }
+    int no[] = {0,0,0,2,12,68};
+    for (int i=0;i<m;i++) {
+        for (int j=0;j<no[g];j++) {
+            if (j!=0) fout << " ";
+            fout << eorbit[i][j];
+        }
+        fout << endl;
+    }
+    fout.close();
+}
+string writeEdgeResultsString(int g) {
+    std::stringstream ss("", ios_base::app | ios_base::out);
+    int no[] = {0,0,0,2,12,68};
+    for (int i=0;i<m;i++) {
+        for (int j=0;j<no[g];j++) {
+            if (j!=0) ss << " ";
+            ss << eorbit[i][j];
+        }
+        ss << endl;
+    }
+    return ss.str();
+}
+int motif_counts(const char* orbit_type, int graphlet_size,
+        const char* input_filename, const char* output_filename, string &out_str) {
+    fstream fin; // input and output files
+    // open input, output files
+    if (strcmp(orbit_type, "node")!=0 && strcmp(orbit_type, "edge")!=0) {
+        cerr << "Incorrect orbit type '" << orbit_type << "'. Should be 'node' or 'edge'." << endl;
+        return 0;
+    }
+    if (graphlet_size!=4 && graphlet_size!=5) {
+        cerr << "Incorrect graphlet size " << graphlet_size << ". Should be 4 or 5." << endl;
+        return 0;
+    }
+    fin.open(input_filename, fstream::in);
+    if (fin.fail()) {
+        cerr << "Failed to open file " << input_filename << endl;
+        return 0;
+    }
+    // read input graph
+    fin >> n >> m;
+    int d_max=0;
+    edges = (PAIR*)malloc(m*sizeof(PAIR));
+    deg = (int*)calloc(n,sizeof(int));
+    for (int i=0;i<m;i++) {
+        int a,b;
+        fin >> a >> b;
+        if (!(0<=a && a<n) || !(0<=b && b<n)) {
+            cerr << "Node ids should be between 0 and n-1." << endl;
+            return 0;
+        }
+        if (a==b) {
+            cerr << "Self loops (edge from x to x) are not allowed." << endl;
+            return 0;
+        }
+        deg[a]++; deg[b]++;
+        edges[i]=PAIR(a,b);
+    }
+    for (int i=0;i<n;i++) d_max=max(d_max,deg[i]);
+    printf("nodes: %d\n",n);
+    printf("edges: %d\n",m);
+    printf("max degree: %d\n",d_max);
+    fin.close();
+    if ((int)(set<PAIR>(edges,edges+m).size())!=m) {
+        cerr << "Input file contains duplicate undirected edges." << endl;
+        return 0;
+    }
+    // set up adjacency matrix if it's smaller than 100MB
+    if ((int64)n*n < 100LL*1024*1024*8) {
+        adjacent = adjacent_matrix;
+        adj_matrix = (int*)calloc((n*n)/adj_chunk+1,sizeof(int));
+        for (int i=0;i<m;i++) {
+            int a=edges[i].a, b=edges[i].b;
+            adj_matrix[(a*n+b)/adj_chunk]|=(1<<((a*n+b)%adj_chunk));
+            adj_matrix[(b*n+a)/adj_chunk]|=(1<<((b*n+a)%adj_chunk));
+        }
+    } else {
+        adjacent = adjacent_list;
+    }
+    // set up adjacency, incidence lists
+    adj = (int**)malloc(n*sizeof(int*));
+    for (int i=0;i<n;i++) adj[i] = (int*)malloc(deg[i]*sizeof(int));
+    inc = (PII**)malloc(n*sizeof(PII*));
+    for (int i=0;i<n;i++) inc[i] = (PII*)malloc(deg[i]*sizeof(PII));
+    int *d = (int*)calloc(n,sizeof(int));
+    for (int i=0;i<m;i++) {
+        int a=edges[i].a, b=edges[i].b;
+        adj[a][d[a]]=b; adj[b][d[b]]=a;
+        inc[a][d[a]]=PII(b,i); inc[b][d[b]]=PII(a,i);
+        d[a]++; d[b]++;
+    }
+    for (int i=0;i<n;i++) {
+        sort(adj[i],adj[i]+deg[i]);
+        sort(inc[i],inc[i]+deg[i]);
+    }
+    // initialize orbit counts
+    orbit = (int64**)malloc(n*sizeof(int64*));
+    for (int i=0;i<n;i++) orbit[i] = (int64*)calloc(73,sizeof(int64));
+    // initialize edge orbit counts
+    eorbit = (int64**)malloc(m*sizeof(int64*));
+    for (int i=0;i<m;i++) eorbit[i] = (int64*)calloc(68,sizeof(int64));
+    if (strcmp(orbit_type,"node") == 0) {
+        printf("Counting NODE orbits of graphlets on %d nodes.\n\n",graphlet_size);
+        if (graphlet_size==4) count4();
+        if (graphlet_size==5) count5();
+        if (strcmp(output_filename, "std") == 0) {
+            cout << "orbit counts: \n" << writeResultsString(graphlet_size) << endl;
+        } else {
+            out_str = writeResults(graphlet_size, output_filename);
+        }
+    } else {
+        printf("Counting EDGE orbits of graphlets on %d nodes.\n\n",graphlet_size);
+        if (graphlet_size==4) ecount4();
+        if (graphlet_size==5) ecount5();
+        if (strcmp(output_filename, "std") == 0) {
+            cout << "orbit counts: \n" << writeEdgeResultsString(graphlet_size) << endl;
+        } else {
+            out_str = writeEdgeResults(graphlet_size, output_filename);
+        }
+    }
+    return 1;
+}
+int init(int argc, char *argv[]) {
+    if (argc!=5) {
+        cerr << "Incorrect number of arguments." << endl;
+        cerr << "Usage: orca.exe [orbit type: node|edge] [graphlet size: 4/5] [graph - input file] [graphlets - output file]" << endl;
+        return 0;
+    }
+    int graphlet_size;
+    sscanf(argv[2],"%d", &graphlet_size);
+    string out;
+    motif_counts(argv[1], graphlet_size, argv[3], argv[4], out);
+    return 1;
+}
+int main(int argc, char *argv[]) {
+    if (!init(argc, argv)) {
+//        cerr << "Stopping!" << endl;
+        return 1;
+    }
+    return 0;
+}

analysis/orca/orca.h ADDED Viewed

	@@ -0,0 +1,1488 @@

+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cassert>
+#include <ctime>
+#include <iostream>
+#include <fstream>
+#include <set>
+#include <unordered_map>
+#include <algorithm>
+#include <Python.h>
+using namespace std;
+typedef long long int64;
+typedef pair<int,int> PII;
+typedef struct { int first, second, third; } TIII;
+struct PAIR {
+    int a, b;
+    PAIR(int a0, int b0) { a=min(a0,b0); b=max(a0,b0); }
+};
+bool operator<(const PAIR &x, const PAIR &y) {
+    if (x.a==y.a) return x.b<y.b;
+    else return x.a<y.a;
+}
+bool operator==(const PAIR &x, const PAIR &y) {
+    return x.a==y.a && x.b==y.b;
+}
+struct hash_PAIR {
+    size_t operator()(const PAIR &x) const {
+        return (x.a<<8) ^ (x.b<<0);
+    }
+};
+struct TRIPLE {
+    int a, b, c;
+    TRIPLE(int a0, int b0, int c0) {
+        a=a0; b=b0; c=c0;
+        if (a>b) swap(a,b);
+        if (b>c) swap(b,c);
+        if (a>b) swap(a,b);
+    }
+};
+bool operator<(const TRIPLE &x, const TRIPLE &y) {
+    if (x.a==y.a) {
+        if (x.b==y.b) return x.c<y.c;
+        else return x.b<y.b;
+    } else return x.a<y.a;
+}
+bool operator==(const TRIPLE &x, const TRIPLE &y) {
+    return x.a==y.a && x.b==y.b && x.c==y.c;
+}
+struct hash_TRIPLE {
+    size_t operator()(const TRIPLE &x) const {
+        return (x.a<<16) ^ (x.b<<8) ^ (x.c<<0);
+    }
+};
+unordered_map<PAIR, int, hash_PAIR> common2;
+unordered_map<TRIPLE, int, hash_TRIPLE> common3;
+unordered_map<PAIR, int, hash_PAIR>::iterator common2_it;
+unordered_map<TRIPLE, int, hash_TRIPLE>::iterator common3_it;
+#define common3_get(x) (((common3_it=common3.find(x))!=common3.end())?(common3_it->second):0)
+#define common2_get(x) (((common2_it=common2.find(x))!=common2.end())?(common2_it->second):0)
+int n,m; // n = number of nodes, m = number of edges
+int *deg; // degrees of individual nodes
+PAIR *edges; // list of edges
+int **adj; // adj[x] - adjacency list of node x
+PII **inc; // inc[x] - incidence list of node x: (y, edge id)
+bool adjacent_list(int x, int y) { return binary_search(adj[x],adj[x]+deg[x],y); }
+int *adj_matrix; // compressed adjacency matrix
+const int adj_chunk = 8*sizeof(int);
+bool adjacent_matrix(int x, int y) { return adj_matrix[(x*n+y)/adj_chunk]&(1<<((x*n+y)%adj_chunk)); }
+bool (*adjacent)(int,int);
+int getEdgeId(int x, int y) { return inc[x][lower_bound(adj[x],adj[x]+deg[x],y)-adj[x]].second; }
+int64 **orbit; // orbit[x][o] - how many times does node x participate in orbit o
+int64 **eorbit; // eorbit[x][o] - how many times does node x participate in edge orbit o
+/** count graphlets on max 4 nodes */
+void count4() {
+    clock_t startTime, endTime;
+    startTime = clock();
+    clock_t startTime_all, endTime_all;
+    startTime_all = startTime;
+    int frac,frac_prev;
+    // precompute triangles that span over edges
+    printf("stage 1 - precomputing common nodes\n");
+    int *tri = (int*)calloc(m,sizeof(int));
+    frac_prev=-1;
+    for (int i=0;i<m;i++) {
+        frac = 100LL*i/m;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        int x=edges[i].a, y=edges[i].b;
+        for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
+            if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
+            else if (adj[x][xi]<adj[y][yi]) { xi++; }
+            else { yi++; }
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // count full graphlets
+    printf("stage 2 - counting full graphlets\n");
+    int64 *C4 = (int64*)calloc(n,sizeof(int64));
+    int *neigh = (int*)malloc(n*sizeof(int)), nn;
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=adj[x][nx];
+            if (y >= x) break;
+            nn=0;
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=adj[y][ny];
+                if (z >= y) break;
+                if (adjacent(x,z)==0) continue;
+                neigh[nn++]=z;
+            }
+            for (int i=0;i<nn;i++) {
+                int z = neigh[i];
+                for (int j=i+1;j<nn;j++) {
+                    int zz = neigh[j];
+                    if (adjacent(z,zz)) {
+                        C4[x]++; C4[y]++; C4[z]++; C4[zz]++;
+                    }
+                }
+            }
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // set up a system of equations relating orbits for every node
+    printf("stage 3 - building systems of equations\n");
+    int *common = (int*)calloc(n,sizeof(int));
+    int *common_list = (int*)malloc(n*sizeof(int)), nc=0;
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        int64 f_12_14=0, f_10_13=0;
+        int64 f_13_14=0, f_11_13=0;
+        int64 f_7_11=0, f_5_8=0;
+        int64 f_6_9=0, f_9_12=0, f_4_8=0, f_8_12=0;
+        int64 f_14=C4[x];
+        for (int i=0;i<nc;i++) common[common_list[i]]=0;
+        nc=0;
+        orbit[x][0]=deg[x];
+        // x - middle node
+        for (int nx1=0;nx1<deg[x];nx1++) {
+            int y=inc[x][nx1].first, ey=inc[x][nx1].second;
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=inc[y][ny].first, ez=inc[y][ny].second;
+                if (adjacent(x,z)) { // triangle
+                    if (z<y) {
+                        f_12_14 += tri[ez]-1;
+                        f_10_13 += (deg[y]-1-tri[ez])+(deg[z]-1-tri[ez]);
+                    }
+                } else {
+                    if (common[z]==0) common_list[nc++]=z;
+                    common[z]++;
+                }
+            }
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int z=inc[x][nx2].first, ez=inc[x][nx2].second;
+                if (adjacent(y,z)) { // triangle
+                    orbit[x][3]++;
+                    f_13_14 += (tri[ey]-1)+(tri[ez]-1);
+                    f_11_13 += (deg[x]-1-tri[ey])+(deg[x]-1-tri[ez]);
+                } else { // path
+                    orbit[x][2]++;
+                    f_7_11 += (deg[x]-1-tri[ey]-1)+(deg[x]-1-tri[ez]-1);
+                    f_5_8 += (deg[y]-1-tri[ey])+(deg[z]-1-tri[ez]);
+                }
+            }
+        }
+        // x - side node
+        for (int nx1=0;nx1<deg[x];nx1++) {
+            int y=inc[x][nx1].first, ey=inc[x][nx1].second;
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=inc[y][ny].first, ez=inc[y][ny].second;
+                if (x==z) continue;
+                if (!adjacent(x,z)) { // path
+                    orbit[x][1]++;
+                    f_6_9 += (deg[y]-1-tri[ey]-1);
+                    f_9_12 += tri[ez];
+                    f_4_8 += (deg[z]-1-tri[ez]);
+                    f_8_12 += (common[z]-1);
+                }
+            }
+        }
+        // solve system of equations
+        orbit[x][14]=(f_14);
+        orbit[x][13]=(f_13_14-6*f_14)/2;
+        orbit[x][12]=(f_12_14-3*f_14);
+        orbit[x][11]=(f_11_13-f_13_14+6*f_14)/2;
+        orbit[x][10]=(f_10_13-f_13_14+6*f_14);
+        orbit[x][9]=(f_9_12-2*f_12_14+6*f_14)/2;
+        orbit[x][8]=(f_8_12-2*f_12_14+6*f_14)/2;
+        orbit[x][7]=(f_13_14+f_7_11-f_11_13-6*f_14)/6;
+        orbit[x][6]=(2*f_12_14+f_6_9-f_9_12-6*f_14)/2;
+        orbit[x][5]=(2*f_12_14+f_5_8-f_8_12-6*f_14);
+        orbit[x][4]=(2*f_12_14+f_4_8-f_8_12-6*f_14);
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    endTime_all = endTime;
+    printf("total: %.2f\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
+}
+/** count edge orbits of graphlets on max 4 nodes */
+void ecount4() {
+    clock_t startTime, endTime;
+    startTime = clock();
+    clock_t startTime_all, endTime_all;
+    startTime_all = startTime;
+    int frac,frac_prev;
+    // precompute triangles that span over edges
+    printf("stage 1 - precomputing common nodes\n");
+    int *tri = (int*)calloc(m,sizeof(int));
+    frac_prev=-1;
+    for (int i=0;i<m;i++) {
+        frac = 100LL*i/m;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        int x=edges[i].a, y=edges[i].b;
+        for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
+            if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
+            else if (adj[x][xi]<adj[y][yi]) { xi++; }
+            else { yi++; }
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // count full graphlets
+    printf("stage 2 - counting full graphlets\n");
+    int64 *C4 = (int64*)calloc(m,sizeof(int64));
+    int *neighx = (int*)malloc(n*sizeof(int)); // lookup table - edges to neighbors of x
+    memset(neighx,-1,n*sizeof(int));
+    int *neigh = (int*)malloc(n*sizeof(int)), nn; // lookup table - common neighbors of x and y
+    PII *neigh_edges = (PII*)malloc(n*sizeof(PII)); // list of common neighbors of x and y
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            neighx[y]=xy;
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            if (y >= x) break;
+            nn=0;
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=inc[y][ny].first, yz=inc[y][ny].second;
+                if (z >= y) break;
+                if (neighx[z]==-1) continue;
+                int xz=neighx[z];
+                neigh[nn]=z;
+                neigh_edges[nn]={xz, yz};
+                nn++;
+            }
+            for (int i=0;i<nn;i++) {
+                int z = neigh[i], xz = neigh_edges[i].first, yz = neigh_edges[i].second;
+                for (int j=i+1;j<nn;j++) {
+                    int w = neigh[j], xw = neigh_edges[j].first, yw = neigh_edges[j].second;
+                    if (adjacent(z,w)) {
+                        C4[xy]++;
+                        C4[xz]++; C4[yz]++;
+                        C4[xw]++; C4[yw]++;
+                        // another iteration to count this last(smallest) edge instead of calling getEdgeId
+                        //int zw=getEdgeId(z,w); C4[zw]++;
+                    }
+                }
+            }
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            neighx[y]=-1;
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // count full graphlets for the smallest edge
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int nx=deg[x]-1;nx>=0;nx--) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            if (y <= x) break;
+            nn=0;
+            for (int ny=deg[y]-1;ny>=0;ny--) {
+                int z=adj[y][ny];
+                if (z <= y) break;
+                if (adjacent(x,z)==0) continue;
+                neigh[nn++]=z;
+            }
+            for (int i=0;i<nn;i++) {
+                int z = neigh[i];
+                for (int j=i+1;j<nn;j++) {
+                    int zz = neigh[j];
+                    if (adjacent(z,zz)) {
+                        C4[xy]++;
+                    }
+                }
+            }
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // set up a system of equations relating orbits for every node
+    printf("stage 3 - building systems of equations\n");
+    int *common = (int*)calloc(n,sizeof(int));
+    int *common_list = (int*)malloc(n*sizeof(int)), nc=0;
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        // common nodes of x and some other node
+        for (int i=0;i<nc;i++) common[common_list[i]]=0;
+        nc=0;
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=adj[x][nx];
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=adj[y][ny];
+                if (z==x) continue;
+                if (common[z]==0) common_list[nc++]=z;
+                common[z]++;
+            }
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            int e=xy;
+            for (int n1=0;n1<deg[x];n1++) {
+                int z=inc[x][n1].first, xz=inc[x][n1].second;
+                if (z==y) continue;
+                if (adjacent(y,z)) { // triangle
+                    if (x<y) {
+                        eorbit[e][1]++;
+                        eorbit[e][10] += tri[xy]-1;
+                        eorbit[e][7] += deg[z]-2;
+                    }
+                    eorbit[e][9] += tri[xz]-1;
+                    eorbit[e][8] += deg[x]-2;
+                }
+            }
+            for (int n1=0;n1<deg[y];n1++) {
+                int z=inc[y][n1].first, yz=inc[y][n1].second;
+                if (z==x) continue;
+                if (!adjacent(x,z)) { // path x-y-z
+                    eorbit[e][0]++;
+                    eorbit[e][6] += tri[yz];
+                    eorbit[e][5] += common[z]-1;
+                    eorbit[e][4] += deg[y]-2;
+                    eorbit[e][3] += deg[x]-1;
+                    eorbit[e][2] += deg[z]-1;
+                }
+            }
+        }
+    }
+    // solve system of equations
+    for (int e=0;e<m;e++) {
+        eorbit[e][11]=C4[e];
+        eorbit[e][10]=(eorbit[e][10]-2*eorbit[e][11])/2;
+        eorbit[e][9]=(eorbit[e][9]-4*eorbit[e][11]);
+        eorbit[e][8]=(eorbit[e][8]-eorbit[e][9]-4*eorbit[e][10]-4*eorbit[e][11]);
+        eorbit[e][7]=(eorbit[e][7]-eorbit[e][9]-2*eorbit[e][11]);
+        eorbit[e][6]=(eorbit[e][6]-eorbit[e][9])/2;
+        eorbit[e][5]=(eorbit[e][5]-eorbit[e][9])/2;
+        eorbit[e][4]=(eorbit[e][4]-2*eorbit[e][6]-eorbit[e][8]-eorbit[e][9])/2;
+        eorbit[e][3]=(eorbit[e][3]-2*eorbit[e][5]-eorbit[e][8]-eorbit[e][9])/2;
+        eorbit[e][2]=(eorbit[e][2]-2*eorbit[e][5]-2*eorbit[e][6]-eorbit[e][9]);
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    endTime_all = endTime;
+    printf("total: %.2f\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
+}
+/** count graphlets on max 5 nodes */
+void count5() {
+    clock_t startTime, endTime;
+    startTime = clock();
+    clock_t startTime_all, endTime_all;
+    startTime_all = startTime;
+    int frac,frac_prev;
+    // precompute common nodes
+    printf("stage 1 - precomputing common nodes\n");
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int n1=0;n1<deg[x];n1++) {
+            int a=adj[x][n1];
+            for (int n2=n1+1;n2<deg[x];n2++) {
+                int b=adj[x][n2];
+                PAIR ab=PAIR(a,b);
+                common2[ab]++;
+                for (int n3=n2+1;n3<deg[x];n3++) {
+                    int c=adj[x][n3];
+                    int st = adjacent(a,b)+adjacent(a,c)+adjacent(b,c);
+                    if (st<2) continue;
+                    TRIPLE abc=TRIPLE(a,b,c);
+                    common3[abc]++;
+                }
+            }
+        }
+    }
+    // precompute triangles that span over edges
+    int *tri = (int*)calloc(m,sizeof(int));
+    for (int i=0;i<m;i++) {
+        int x=edges[i].a, y=edges[i].b;
+        for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
+            if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
+            else if (adj[x][xi]<adj[y][yi]) { xi++; }
+            else { yi++; }
+        }
+    }
+    endTime = clock();
+    printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // count full graphlets
+    printf("stage 2 - counting full graphlets\n");
+    int64 *C5 = (int64*)calloc(n,sizeof(int64));
+    int *neigh = (int*)malloc(n*sizeof(int)), nn;
+    int *neigh2 = (int*)malloc(n*sizeof(int)), nn2;
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=adj[x][nx];
+            if (y >= x) break;
+            nn=0;
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=adj[y][ny];
+                if (z >= y) break;
+                if (adjacent(x,z)) {
+                    neigh[nn++]=z;
+                }
+            }
+            for (int i=0;i<nn;i++) {
+                int z = neigh[i];
+                nn2=0;
+                for (int j=i+1;j<nn;j++) {
+                    int zz = neigh[j];
+                    if (adjacent(z,zz)) {
+                        neigh2[nn2++]=zz;
+                    }
+                }
+                for (int i2=0;i2<nn2;i2++) {
+                    int zz = neigh2[i2];
+                    for (int j2=i2+1;j2<nn2;j2++) {
+                        int zzz = neigh2[j2];
+                        if (adjacent(zz,zzz)) {
+                            C5[x]++; C5[y]++; C5[z]++; C5[zz]++; C5[zzz]++;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    endTime = clock();
+    printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    int *common_x = (int*)calloc(n,sizeof(int));
+    int *common_x_list = (int*)malloc(n*sizeof(int)), ncx=0;
+    int *common_a = (int*)calloc(n,sizeof(int));
+    int *common_a_list = (int*)malloc(n*sizeof(int)), nca=0;
+    // set up a system of equations relating orbit counts
+    printf("stage 3 - building systems of equations\n");
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int i=0;i<ncx;i++) common_x[common_x_list[i]]=0;
+        ncx=0;
+        // smaller graphlets
+        orbit[x][0] = deg[x];
+        for (int nx1=0;nx1<deg[x];nx1++) {
+            int a=adj[x][nx1];
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int b=adj[x][nx2];
+                if (adjacent(a,b)) orbit[x][3]++;
+                else orbit[x][2]++;
+            }
+            for (int na=0;na<deg[a];na++) {
+                int b=adj[a][na];
+                if (b!=x && !adjacent(x,b)) {
+                    orbit[x][1]++;
+                    if (common_x[b]==0) common_x_list[ncx++]=b;
+                    common_x[b]++;
+                }
+            }
+        }
+        int64 f_71=0, f_70=0, f_67=0, f_66=0, f_58=0, f_57=0; // 14
+        int64 f_69=0, f_68=0, f_64=0, f_61=0, f_60=0, f_55=0, f_48=0, f_42=0, f_41=0; // 13
+        int64 f_65=0, f_63=0, f_59=0, f_54=0, f_47=0, f_46=0, f_40=0; // 12
+        int64 f_62=0, f_53=0, f_51=0, f_50=0, f_49=0, f_38=0, f_37=0, f_36=0; // 8
+        int64 f_44=0, f_33=0, f_30=0, f_26=0; // 11
+        int64 f_52=0, f_43=0, f_32=0, f_29=0, f_25=0; // 10
+        int64 f_56=0, f_45=0, f_39=0, f_31=0, f_28=0, f_24=0; // 9
+        int64 f_35=0, f_34=0, f_27=0, f_18=0, f_16=0, f_15=0; // 4
+        int64 f_17=0; // 5
+        int64 f_22=0, f_20=0, f_19=0; // 6
+        int64 f_23=0, f_21=0; // 7
+        for (int nx1=0;nx1<deg[x];nx1++) {
+            int a=inc[x][nx1].first, xa=inc[x][nx1].second;
+            for (int i=0;i<nca;i++) common_a[common_a_list[i]]=0;
+            nca=0;
+            for (int na=0;na<deg[a];na++) {
+                int b=adj[a][na];
+                for (int nb=0;nb<deg[b];nb++) {
+                    int c=adj[b][nb];
+                    if (c==a || adjacent(a,c)) continue;
+                    if (common_a[c]==0) common_a_list[nca++]=c;
+                    common_a[c]++;
+                }
+            }
+            // x = orbit-14 (tetrahedron)
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (!adjacent(a,b)) continue;
+                for (int nx3=nx2+1;nx3<deg[x];nx3++) {
+                    int c=inc[x][nx3].first, xc=inc[x][nx3].second;
+                    if (!adjacent(a,c) || !adjacent(b,c)) continue;
+                    orbit[x][14]++;
+                    f_70 += common3_get(TRIPLE(a,b,c))-1;
+                    f_71 += (tri[xa]>2 && tri[xb]>2)?(common3_get(TRIPLE(x,a,b))-1):0;
+                    f_71 += (tri[xa]>2 && tri[xc]>2)?(common3_get(TRIPLE(x,a,c))-1):0;
+                    f_71 += (tri[xb]>2 && tri[xc]>2)?(common3_get(TRIPLE(x,b,c))-1):0;
+                    f_67 += tri[xa]-2+tri[xb]-2+tri[xc]-2;
+                    f_66 += common2_get(PAIR(a,b))-2;
+                    f_66 += common2_get(PAIR(a,c))-2;
+                    f_66 += common2_get(PAIR(b,c))-2;
+                    f_58 += deg[x]-3;
+                    f_57 += deg[a]-3+deg[b]-3+deg[c]-3;
+                }
+            }
+            // x = orbit-13 (diamond)
+            for (int nx2=0;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (!adjacent(a,b)) continue;
+                for (int nx3=nx2+1;nx3<deg[x];nx3++) {
+                    int c=inc[x][nx3].first, xc=inc[x][nx3].second;
+                    if (!adjacent(a,c) || adjacent(b,c)) continue;
+                    orbit[x][13]++;
+                    f_69 += (tri[xb]>1 && tri[xc]>1)?(common3_get(TRIPLE(x,b,c))-1):0;
+                    f_68 += common3_get(TRIPLE(a,b,c))-1;
+                    f_64 += common2_get(PAIR(b,c))-2;
+                    f_61 += tri[xb]-1+tri[xc]-1;
+                    f_60 += common2_get(PAIR(a,b))-1;
+                    f_60 += common2_get(PAIR(a,c))-1;
+                    f_55 += tri[xa]-2;
+                    f_48 += deg[b]-2+deg[c]-2;
+                    f_42 += deg[x]-3;
+                    f_41 += deg[a]-3;
+                }
+            }
+            // x = orbit-12 (diamond)
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (!adjacent(a,b)) continue;
+                for (int na=0;na<deg[a];na++) {
+                    int c=inc[a][na].first, ac=inc[a][na].second;
+                    if (c==x || adjacent(x,c) || !adjacent(b,c)) continue;
+                    orbit[x][12]++;
+                    f_65 += (tri[ac]>1)?common3_get(TRIPLE(a,b,c)):0;
+                    f_63 += common_x[c]-2;
+                    f_59 += tri[ac]-1+common2_get(PAIR(b,c))-1;
+                    f_54 += common2_get(PAIR(a,b))-2;
+                    f_47 += deg[x]-2;
+                    f_46 += deg[c]-2;
+                    f_40 += deg[a]-3+deg[b]-3;
+                }
+            }
+            // x = orbit-8 (cycle)
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (adjacent(a,b)) continue;
+                for (int na=0;na<deg[a];na++) {
+                    int c=inc[a][na].first, ac=inc[a][na].second;
+                    if (c==x || adjacent(x,c) || !adjacent(b,c)) continue;
+                    orbit[x][8]++;
+                    f_62 += (tri[ac]>0)?common3_get(TRIPLE(a,b,c)):0;
+                    f_53 += tri[xa]+tri[xb];
+                    f_51 += tri[ac]+common2_get(PAIR(c,b));
+                    f_50 += common_x[c]-2;
+                    f_49 += common_a[b]-2;
+                    f_38 += deg[x]-2;
+                    f_37 += deg[a]-2+deg[b]-2;
+                    f_36 += deg[c]-2;
+                }
+            }
+            // x = orbit-11 (paw)
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (!adjacent(a,b)) continue;
+                for (int nx3=0;nx3<deg[x];nx3++) {
+                    int c=inc[x][nx3].first, xc=inc[x][nx3].second;
+                    if (c==a || c==b || adjacent(a,c) || adjacent(b,c)) continue;
+                    orbit[x][11]++;
+                    f_44 += tri[xc];
+                    f_33 += deg[x]-3;
+                    f_30 += deg[c]-1;
+                    f_26 += deg[a]-2+deg[b]-2;
+                }
+            }
+            // x = orbit-10 (paw)
+            for (int nx2=0;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (!adjacent(a,b)) continue;
+                for (int nb=0;nb<deg[b];nb++) {
+                    int c=inc[b][nb].first, bc=inc[b][nb].second;
+                    if (c==x || c==a || adjacent(a,c) || adjacent(x,c)) continue;
+                    orbit[x][10]++;
+                    f_52 += common_a[c]-1;
+                    f_43 += tri[bc];
+                    f_32 += deg[b]-3;
+                    f_29 += deg[c]-1;
+                    f_25 += deg[a]-2;
+                }
+            }
+            // x = orbit-9 (paw)
+            for (int na1=0;na1<deg[a];na1++) {
+                int b=inc[a][na1].first, ab=inc[a][na1].second;
+                if (b==x || adjacent(x,b)) continue;
+                for (int na2=na1+1;na2<deg[a];na2++) {
+                    int c=inc[a][na2].first, ac=inc[a][na2].second;
+                    if (c==x || !adjacent(b,c) || adjacent(x,c)) continue;
+                    orbit[x][9]++;
+                    f_56 += (tri[ab]>1 && tri[ac]>1)?common3_get(TRIPLE(a,b,c)):0;
+                    f_45 += common2_get(PAIR(b,c))-1;
+                    f_39 += tri[ab]-1+tri[ac]-1;
+                    f_31 += deg[a]-3;
+                    f_28 += deg[x]-1;
+                    f_24 += deg[b]-2+deg[c]-2;
+                }
+            }
+            // x = orbit-4 (path)
+            for (int na=0;na<deg[a];na++) {
+                int b=inc[a][na].first, ab=inc[a][na].second;
+                if (b==x || adjacent(x,b)) continue;
+                for (int nb=0;nb<deg[b];nb++) {
+                    int c=inc[b][nb].first, bc=inc[b][nb].second;
+                    if (c==a || adjacent(a,c) || adjacent(x,c)) continue;
+                    orbit[x][4]++;
+                    f_35 += common_a[c]-1;
+                    f_34 += common_x[c];
+                    f_27 += tri[bc];
+                    f_18 += deg[b]-2;
+                    f_16 += deg[x]-1;
+                    f_15 += deg[c]-1;
+                }
+            }
+            // x = orbit-5 (path)
+            for (int nx2=0;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (b==a || adjacent(a,b)) continue;
+                for (int nb=0;nb<deg[b];nb++) {
+                    int c=inc[b][nb].first, bc=inc[b][nb].second;
+                    if (c==x || adjacent(a,c) || adjacent(x,c)) continue;
+                    orbit[x][5]++;
+                    f_17 += deg[a]-1;
+                }
+            }
+            // x = orbit-6 (claw)
+            for (int na1=0;na1<deg[a];na1++) {
+                int b=inc[a][na1].first, ab=inc[a][na1].second;
+                if (b==x || adjacent(x,b)) continue;
+                for (int na2=na1+1;na2<deg[a];na2++) {
+                    int c=inc[a][na2].first, ac=inc[a][na2].second;
+                    if (c==x || adjacent(x,c) || adjacent(b,c)) continue;
+                    orbit[x][6]++;
+                    f_22 += deg[a]-3;
+                    f_20 += deg[x]-1;
+                    f_19 += deg[b]-1+deg[c]-1;
+                }
+            }
+            // x = orbit-7 (claw)
+            for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                int b=inc[x][nx2].first, xb=inc[x][nx2].second;
+                if (adjacent(a,b)) continue;
+                for (int nx3=nx2+1;nx3<deg[x];nx3++) {
+                    int c=inc[x][nx3].first, xc=inc[x][nx3].second;
+                    if (adjacent(a,c) || adjacent(b,c)) continue;
+                    orbit[x][7]++;
+                    f_23 += deg[x]-3;
+                    f_21 += deg[a]-1+deg[b]-1+deg[c]-1;
+                }
+            }
+        }
+        // solve equations
+        orbit[x][72] = C5[x];
+        orbit[x][71] = (f_71-12*orbit[x][72])/2;
+        orbit[x][70] = (f_70-4*orbit[x][72]);
+        orbit[x][69] = (f_69-2*orbit[x][71])/4;
+        orbit[x][68] = (f_68-2*orbit[x][71]);
+        orbit[x][67] = (f_67-12*orbit[x][72]-4*orbit[x][71]);
+        orbit[x][66] = (f_66-12*orbit[x][72]-2*orbit[x][71]-3*orbit[x][70]);
+        orbit[x][65] = (f_65-3*orbit[x][70])/2;
+        orbit[x][64] = (f_64-2*orbit[x][71]-4*orbit[x][69]-1*orbit[x][68]);
+        orbit[x][63] = (f_63-3*orbit[x][70]-2*orbit[x][68]);
+        orbit[x][62] = (f_62-1*orbit[x][68])/2;
+        orbit[x][61] = (f_61-4*orbit[x][71]-8*orbit[x][69]-2*orbit[x][67])/2;
+        orbit[x][60] = (f_60-4*orbit[x][71]-2*orbit[x][68]-2*orbit[x][67]);
+        orbit[x][59] = (f_59-6*orbit[x][70]-2*orbit[x][68]-4*orbit[x][65]);
+        orbit[x][58] = (f_58-4*orbit[x][72]-2*orbit[x][71]-1*orbit[x][67]);
+        orbit[x][57] = (f_57-12*orbit[x][72]-4*orbit[x][71]-3*orbit[x][70]-1*orbit[x][67]-2*orbit[x][66]);
+        orbit[x][56] = (f_56-2*orbit[x][65])/3;
+        orbit[x][55] = (f_55-2*orbit[x][71]-2*orbit[x][67])/3;
+        orbit[x][54] = (f_54-3*orbit[x][70]-1*orbit[x][66]-2*orbit[x][65])/2;
+        orbit[x][53] = (f_53-2*orbit[x][68]-2*orbit[x][64]-2*orbit[x][63]);
+        orbit[x][52] = (f_52-2*orbit[x][66]-2*orbit[x][64]-1*orbit[x][59])/2;
+        orbit[x][51] = (f_51-2*orbit[x][68]-2*orbit[x][63]-4*orbit[x][62]);
+        orbit[x][50] = (f_50-1*orbit[x][68]-2*orbit[x][63])/3;
+        orbit[x][49] = (f_49-1*orbit[x][68]-1*orbit[x][64]-2*orbit[x][62])/2;
+        orbit[x][48] = (f_48-4*orbit[x][71]-8*orbit[x][69]-2*orbit[x][68]-2*orbit[x][67]-2*orbit[x][64]-2*orbit[x][61]-1*orbit[x][60]);
+        orbit[x][47] = (f_47-3*orbit[x][70]-2*orbit[x][68]-1*orbit[x][66]-1*orbit[x][63]-1*orbit[x][60]);
+        orbit[x][46] = (f_46-3*orbit[x][70]-2*orbit[x][68]-2*orbit[x][65]-1*orbit[x][63]-1*orbit[x][59]);
+        orbit[x][45] = (f_45-2*orbit[x][65]-2*orbit[x][62]-3*orbit[x][56]);
+        orbit[x][44] = (f_44-1*orbit[x][67]-2*orbit[x][61])/4;
+        orbit[x][43] = (f_43-2*orbit[x][66]-1*orbit[x][60]-1*orbit[x][59])/2;
+        orbit[x][42] = (f_42-2*orbit[x][71]-4*orbit[x][69]-2*orbit[x][67]-2*orbit[x][61]-3*orbit[x][55]);
+        orbit[x][41] = (f_41-2*orbit[x][71]-1*orbit[x][68]-2*orbit[x][67]-1*orbit[x][60]-3*orbit[x][55]);
+        orbit[x][40] = (f_40-6*orbit[x][70]-2*orbit[x][68]-2*orbit[x][66]-4*orbit[x][65]-1*orbit[x][60]-1*orbit[x][59]-4*orbit[x][54]);
+        orbit[x][39] = (f_39-4*orbit[x][65]-1*orbit[x][59]-6*orbit[x][56])/2;
+        orbit[x][38] = (f_38-1*orbit[x][68]-1*orbit[x][64]-2*orbit[x][63]-1*orbit[x][53]-3*orbit[x][50]);
+        orbit[x][37] = (f_37-2*orbit[x][68]-2*orbit[x][64]-2*orbit[x][63]-4*orbit[x][62]-1*orbit[x][53]-1*orbit[x][51]-4*orbit[x][49]);
+        orbit[x][36] = (f_36-1*orbit[x][68]-2*orbit[x][63]-2*orbit[x][62]-1*orbit[x][51]-3*orbit[x][50]);
+        orbit[x][35] = (f_35-1*orbit[x][59]-2*orbit[x][52]-2*orbit[x][45])/2;
+        orbit[x][34] = (f_34-1*orbit[x][59]-2*orbit[x][52]-1*orbit[x][51])/2;
+        orbit[x][33] = (f_33-1*orbit[x][67]-2*orbit[x][61]-3*orbit[x][58]-4*orbit[x][44]-2*orbit[x][42])/2;
+        orbit[x][32] = (f_32-2*orbit[x][66]-1*orbit[x][60]-1*orbit[x][59]-2*orbit[x][57]-2*orbit[x][43]-2*orbit[x][41]-1*orbit[x][40])/2;
+        orbit[x][31] = (f_31-2*orbit[x][65]-1*orbit[x][59]-3*orbit[x][56]-1*orbit[x][43]-2*orbit[x][39]);
+        orbit[x][30] = (f_30-1*orbit[x][67]-1*orbit[x][63]-2*orbit[x][61]-1*orbit[x][53]-4*orbit[x][44]);
+        orbit[x][29] = (f_29-2*orbit[x][66]-2*orbit[x][64]-1*orbit[x][60]-1*orbit[x][59]-1*orbit[x][53]-2*orbit[x][52]-2*orbit[x][43]);
+        orbit[x][28] = (f_28-2*orbit[x][65]-2*orbit[x][62]-1*orbit[x][59]-1*orbit[x][51]-1*orbit[x][43]);
+        orbit[x][27] = (f_27-1*orbit[x][59]-1*orbit[x][51]-2*orbit[x][45])/2;
+        orbit[x][26] = (f_26-2*orbit[x][67]-2*orbit[x][63]-2*orbit[x][61]-6*orbit[x][58]-1*orbit[x][53]-2*orbit[x][47]-2*orbit[x][42]);
+        orbit[x][25] = (f_25-2*orbit[x][66]-2*orbit[x][64]-1*orbit[x][59]-2*orbit[x][57]-2*orbit[x][52]-1*orbit[x][48]-1*orbit[x][40])/2;
+        orbit[x][24] = (f_24-4*orbit[x][65]-4*orbit[x][62]-1*orbit[x][59]-6*orbit[x][56]-1*orbit[x][51]-2*orbit[x][45]-2*orbit[x][39]);
+        orbit[x][23] = (f_23-1*orbit[x][55]-1*orbit[x][42]-2*orbit[x][33])/4;
+        orbit[x][22] = (f_22-2*orbit[x][54]-1*orbit[x][40]-1*orbit[x][39]-1*orbit[x][32]-2*orbit[x][31])/3;
+        orbit[x][21] = (f_21-3*orbit[x][55]-3*orbit[x][50]-2*orbit[x][42]-2*orbit[x][38]-2*orbit[x][33]);
+        orbit[x][20] = (f_20-2*orbit[x][54]-2*orbit[x][49]-1*orbit[x][40]-1*orbit[x][37]-1*orbit[x][32]);
+        orbit[x][19] = (f_19-4*orbit[x][54]-4*orbit[x][49]-1*orbit[x][40]-2*orbit[x][39]-1*orbit[x][37]-2*orbit[x][35]-2*orbit[x][31]);
+        orbit[x][18] = (f_18-1*orbit[x][59]-1*orbit[x][51]-2*orbit[x][46]-2*orbit[x][45]-2*orbit[x][36]-2*orbit[x][27]-1*orbit[x][24])/2;
+        orbit[x][17] = (f_17-1*orbit[x][60]-1*orbit[x][53]-1*orbit[x][51]-1*orbit[x][48]-1*orbit[x][37]-2*orbit[x][34]-2*orbit[x][30])/2;
+        orbit[x][16] = (f_16-1*orbit[x][59]-2*orbit[x][52]-1*orbit[x][51]-2*orbit[x][46]-2*orbit[x][36]-2*orbit[x][34]-1*orbit[x][29]);
+        orbit[x][15] = (f_15-1*orbit[x][59]-2*orbit[x][52]-1*orbit[x][51]-2*orbit[x][45]-2*orbit[x][35]-2*orbit[x][34]-2*orbit[x][27]);
+    }
+    endTime = clock();
+    printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    endTime_all = endTime;
+    printf("total: %.2f sec\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
+}
+/** count edge orbits of graphlets on max 5 nodes */
+void ecount5() {
+    clock_t startTime, endTime;
+    startTime = clock();
+    clock_t startTime_all, endTime_all;
+    startTime_all = startTime;
+    int frac,frac_prev;
+    // precompute common nodes
+    printf("stage 1 - precomputing common nodes\n");
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int n1=0;n1<deg[x];n1++) {
+            int a=adj[x][n1];
+            for (int n2=n1+1;n2<deg[x];n2++) {
+                int b=adj[x][n2];
+                PAIR ab=PAIR(a,b);
+                common2[ab]++;
+                for (int n3=n2+1;n3<deg[x];n3++) {
+                    int c=adj[x][n3];
+                    int st = adjacent(a,b)+adjacent(a,c)+adjacent(b,c);
+                    if (st<2) continue;
+                    TRIPLE abc=TRIPLE(a,b,c);
+                    common3[abc]++;
+                }
+            }
+        }
+    }
+    // precompute triangles that span over edges
+    int *tri = (int*)calloc(m,sizeof(int));
+    for (int i=0;i<m;i++) {
+        int x=edges[i].a, y=edges[i].b;
+        for (int xi=0,yi=0; xi<deg[x] && yi<deg[y]; ) {
+            if (adj[x][xi]==adj[y][yi]) { tri[i]++; xi++; yi++; }
+            else if (adj[x][xi]<adj[y][yi]) { xi++; }
+            else { yi++; }
+        }
+    }
+    endTime = clock();
+    printf("%.2f sec\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // count full graphlets
+    printf("stage 2 - counting full graphlets\n");
+    int64 *C5 = (int64*)calloc(m,sizeof(int64));
+    int *neighx = (int*)malloc(n*sizeof(int)); // lookup table - edges to neighbors of x
+    memset(neighx,-1,n*sizeof(int));
+    int *neigh = (int*)malloc(n*sizeof(int)), nn; // lookup table - common neighbors of x and y
+    PII *neigh_edges = (PII*)malloc(n*sizeof(PII)); // list of common neighbors of x and y
+    int *neigh2 = (int*)malloc(n*sizeof(int)), nn2;
+    TIII *neigh2_edges = (TIII*)malloc(n*sizeof(TIII));
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            neighx[y]=xy;
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            if (y >= x) break;
+            nn=0;
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=inc[y][ny].first, yz=inc[y][ny].second;
+                if (z >= y) break;
+                if (neighx[z]==-1) continue;
+                int xz=neighx[z];
+                neigh[nn]=z;
+                neigh_edges[nn]={xz, yz};
+                nn++;
+            }
+            for (int i=0;i<nn;i++) {
+                int z = neigh[i], xz = neigh_edges[i].first, yz = neigh_edges[i].second;
+                nn2 = 0;
+                for (int j=i+1;j<nn;j++) {
+                    int w = neigh[j], xw = neigh_edges[j].first, yw = neigh_edges[j].second;
+                    if (adjacent(z,w)) {
+                        neigh2[nn2]=w;
+                        int zw=getEdgeId(z,w);
+                        neigh2_edges[nn2]={xw,yw,zw};
+                        nn2++;
+                    }
+                }
+                for (int i2=0;i2<nn2;i2++) {
+                    int z2 = neigh2[i2];
+                    int z2x=neigh2_edges[i2].first, z2y=neigh2_edges[i2].second, z2z=neigh2_edges[i2].third;
+                    for (int j2=i2+1;j2<nn2;j2++) {
+                        int z3 = neigh2[j2];
+                        int z3x=neigh2_edges[j2].first, z3y=neigh2_edges[j2].second, z3z=neigh2_edges[j2].third;
+                        if (adjacent(z2,z3)) {
+                            int zid=getEdgeId(z2,z3);
+                            C5[xy]++; C5[xz]++; C5[yz]++;
+                            C5[z2x]++; C5[z2y]++; C5[z2z]++;
+                            C5[z3x]++; C5[z3y]++; C5[z3z]++;
+                            C5[zid]++;
+                        }
+                    }
+                }
+            }
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            neighx[y]=-1;
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    startTime = endTime;
+    // set up a system of equations relating orbits for every node
+    printf("stage 3 - building systems of equations\n");
+    int *common_x = (int*)calloc(n,sizeof(int));
+    int *common_x_list = (int*)malloc(n*sizeof(int)), nc_x=0;
+    int *common_y = (int*)calloc(n,sizeof(int));
+    int *common_y_list = (int*)malloc(n*sizeof(int)), nc_y=0;
+    frac_prev=-1;
+    for (int x=0;x<n;x++) {
+        frac = 100LL*x/n;
+        if (frac!=frac_prev) {
+            printf("%d%%\r",frac);
+            frac_prev=frac;
+        }
+        // common nodes of x and some other node
+        for (int i=0;i<nc_x;i++) common_x[common_x_list[i]]=0;
+        nc_x=0;
+        for (int nx=0;nx<deg[x];nx++) {
+            int a=adj[x][nx];
+            for (int na=0;na<deg[a];na++) {
+                int z=adj[a][na];
+                if (z==x) continue;
+                if (common_x[z]==0) common_x_list[nc_x++]=z;
+                common_x[z]++;
+            }
+        }
+        for (int nx=0;nx<deg[x];nx++) {
+            int y=inc[x][nx].first, xy=inc[x][nx].second;
+            int e=xy;
+            if (y>=x) break;
+            // common nodes of y and some other node
+            for (int i=0;i<nc_y;i++) common_y[common_y_list[i]]=0;
+            nc_y=0;
+            for (int ny=0;ny<deg[y];ny++) {
+                int a=adj[y][ny];
+                for (int na=0;na<deg[a];na++) {
+                    int z=adj[a][na];
+                    if (z==y) continue;
+                    if (common_y[z]==0) common_y_list[nc_y++]=z;
+                    common_y[z]++;
+                }
+            }
+            int64 f_66=0, f_65=0, f_62=0, f_61=0, f_60=0, f_51=0, f_50=0; // 11
+            int64 f_64=0, f_58=0, f_55=0, f_48=0, f_41=0, f_35=0; // 10
+            int64 f_63=0, f_59=0, f_57=0, f_54=0, f_53=0, f_52=0, f_47=0, f_40=0, f_39=0, f_34=0, f_33=0; // 9
+            int64 f_45=0, f_36=0, f_26=0, f_23=0, f_19=0; // 7
+            int64 f_49=0, f_38=0, f_37=0, f_32=0, f_25=0, f_22=0, f_18=0; // 6
+            int64 f_56=0, f_46=0, f_44=0, f_43=0, f_42=0, f_31=0, f_30=0; // 5
+            int64 f_27=0, f_17=0, f_15=0; // 4
+            int64 f_20=0, f_16=0, f_13=0; // 3
+            int64 f_29=0, f_28=0, f_24=0, f_21=0, f_14=0, f_12=0; // 2
+            // smaller (3-node) graphlets
+            orbit[x][0] = deg[x];
+            for (int nx1=0;nx1<deg[x];nx1++) {
+                int z=adj[x][nx1];
+                if (z==y) continue;
+                if (adjacent(y,z)) eorbit[e][1]++;
+                else eorbit[e][0]++;
+            }
+            for (int ny=0;ny<deg[y];ny++) {
+                int z=adj[y][ny];
+                if (z==x) continue;
+                if (!adjacent(x,z)) eorbit[e][0]++;
+            }
+            // edge-orbit 11 = (14,14)
+            for (int nx1=0;nx1<deg[x];nx1++) {
+                int a=adj[x][nx1], xa=inc[x][nx1].second;
+                if (a==y || !adjacent(y,a)) continue;
+                for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                    int b=adj[x][nx2], xb=inc[x][nx2].second;
+                    if (b==y || !adjacent(y,b) || !adjacent(a,b)) continue;
+                    int ya=getEdgeId(y,a), yb=getEdgeId(y,b), ab=getEdgeId(a,b);
+                    eorbit[e][11]++;
+                    f_66 += common3_get(TRIPLE(x,y,a))-1;
+                    f_66 += common3_get(TRIPLE(x,y,b))-1;
+                    f_65 += common3_get(TRIPLE(a,b,x))-1;
+                    f_65 += common3_get(TRIPLE(a,b,y))-1;
+                    f_62 += tri[xy]-2;
+                    f_61 += (tri[xa]-2)+(tri[xb]-2)+(tri[ya]-2)+(tri[yb]-2);
+                    f_60 += tri[ab]-2;
+                    f_51 += (deg[x]-3)+(deg[y]-3);
+                    f_50 += (deg[a]-3)+(deg[b]-3);
+                }
+            }
+            // edge-orbit 10 = (13,13)
+            for (int nx1=0;nx1<deg[x];nx1++) {
+                int a=adj[x][nx1], xa=inc[x][nx1].second;
+                if (a==y || !adjacent(y,a)) continue;
+                for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                    int b=adj[x][nx2], xb=inc[x][nx2].second;
+                    if (b==y || !adjacent(y,b) || adjacent(a,b)) continue;
+                    int ya=getEdgeId(y,a), yb=getEdgeId(y,b);
+                    eorbit[e][10]++;
+                    f_64 += common3_get(TRIPLE(a,b,x))-1;
+                    f_64 += common3_get(TRIPLE(a,b,y))-1;
+                    f_58 += common2_get(PAIR(a,b))-2;
+                    f_55 += (tri[xa]-1)+(tri[xb]-1)+(tri[ya]-1)+(tri[yb]-1);
+                    f_48 += tri[xy]-2;
+                    f_41 += (deg[a]-2)+(deg[b]-2);
+                    f_35 += (deg[x]-3)+(deg[y]-3);
+                }
+            }
+            // edge-orbit 9 = (12,13)
+            for (int nx=0;nx<deg[x];nx++) {
+                int a=adj[x][nx], xa=inc[x][nx].second;
+                if (a==y) continue;
+                for (int ny=0;ny<deg[y];ny++) {
+                    int b=adj[y][ny], yb=inc[y][ny].second;
+                    if (b==x || !adjacent(a,b)) continue;
+                    int adj_ya=adjacent(y,a), adj_xb=adjacent(x,b);
+                    if (adj_ya+adj_xb!=1) continue;
+                    int ab=getEdgeId(a,b);
+                    eorbit[e][9]++;
+                    if (adj_xb) {
+                        int xb=getEdgeId(x,b);
+                        f_63 += common3_get(TRIPLE(a,b,y))-1;
+                        f_59 += common3_get(TRIPLE(a,b,x));
+                        f_57 += common_y[a]-2;
+                        f_54 += tri[yb]-1;
+                        f_53 += tri[xa]-1;
+                        f_47 += tri[xb]-2;
+                        f_40 += deg[y]-2;
+                        f_39 += deg[a]-2;
+                        f_34 += deg[x]-3;
+                        f_33 += deg[b]-3;
+                    } else if (adj_ya) {
+                        int ya=getEdgeId(y,a);
+                        f_63 += common3_get(TRIPLE(a,b,x))-1;
+                        f_59 += common3_get(TRIPLE(a,b,y));
+                        f_57 += common_x[b]-2;
+                        f_54 += tri[xa]-1;
+                        f_53 += tri[yb]-1;
+                        f_47 += tri[ya]-2;
+                        f_40 += deg[x]-2;
+                        f_39 += deg[b]-2;
+                        f_34 += deg[y]-3;
+                        f_33 += deg[a]-3;
+                    }
+                    f_52 += tri[ab]-1;
+                }
+            }
+            // edge-orbit 8 = (10,11)
+            for (int nx=0;nx<deg[x];nx++) {
+                int a=adj[x][nx];
+                if (a==y || !adjacent(y,a)) continue;
+                for (int nx1=0;nx1<deg[x];nx1++) {
+                    int b=adj[x][nx1];
+                    if (b==y || b==a || adjacent(y,b) || adjacent(a,b)) continue;
+                    eorbit[e][8]++;
+                }
+                for (int ny1=0;ny1<deg[y];ny1++) {
+                    int b=adj[y][ny1];
+                    if (b==x || b==a || adjacent(x,b) || adjacent(a,b)) continue;
+                    eorbit[e][8]++;
+                }
+            }
+            // edge-orbit 7 = (10,10)
+            for (int nx=0;nx<deg[x];nx++) {
+                int a=adj[x][nx];
+                if (a==y || !adjacent(y,a)) continue;
+                for (int na=0;na<deg[a];na++) {
+                    int b=adj[a][na], ab=inc[a][na].second;
+                    if (b==x || b==y || adjacent(x,b) || adjacent(y,b)) continue;
+                    eorbit[e][7]++;
+                    f_45 += common_x[b]-1;
+                    f_45 += common_y[b]-1;
+                    f_36 += tri[ab];
+                    f_26 += deg[a]-3;
+                    f_23 += deg[b]-1;
+                    f_19 += (deg[x]-2)+(deg[y]-2);
+                }
+            }
+            // edge-orbit 6 = (9,11)
+            for (int ny1=0;ny1<deg[y];ny1++) {
+                int a=adj[y][ny1], ya=inc[y][ny1].second;
+                if (a==x || adjacent(x,a)) continue;
+                for (int ny2=ny1+1;ny2<deg[y];ny2++) {
+                    int b=adj[y][ny2], yb=inc[y][ny2].second;
+                    if (b==x || adjacent(x,b) || !adjacent(a,b)) continue;
+                    int ab=getEdgeId(a,b);
+                    eorbit[e][6]++;
+                    f_49 += common3_get(TRIPLE(y,a,b));
+                    f_38 += tri[ab]-1;
+                    f_37 += tri[xy];
+                    f_32 += (tri[ya]-1)+(tri[yb]-1);
+                    f_25 += deg[y]-3;
+                    f_22 += deg[x]-1;
+                    f_18 += (deg[a]-2)+(deg[b]-2);
+                }
+            }
+            for (int nx1=0;nx1<deg[x];nx1++) {
+                int a=adj[x][nx1], xa=inc[x][nx1].second;
+                if (a==y || adjacent(y,a)) continue;
+                for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                    int b=adj[x][nx2], xb=inc[x][nx2].second;
+                    if (b==y || adjacent(y,b) || !adjacent(a,b)) continue;
+                    int ab=getEdgeId(a,b);
+                    eorbit[e][6]++;
+                    f_49 += common3_get(TRIPLE(x,a,b));
+                    f_38 += tri[ab]-1;
+                    f_37 += tri[xy];
+                    f_32 += (tri[xa]-1)+(tri[xb]-1);
+                    f_25 += deg[x]-3;
+                    f_22 += deg[y]-1;
+                    f_18 += (deg[a]-2)+(deg[b]-2);
+                }
+            }
+            // edge-orbit 5 = (8,8)
+            for (int nx=0;nx<deg[x];nx++) {
+                int a=adj[x][nx], xa=inc[x][nx].second;
+                if (a==y || adjacent(y,a)) continue;
+                for (int ny=0;ny<deg[y];ny++) {
+                    int b=adj[y][ny], yb=inc[y][ny].second;
+                    if (b==x || adjacent(x,b) || !adjacent(a,b)) continue;
+                    int ab=getEdgeId(a,b);
+                    eorbit[e][5]++;
+                    f_56 += common3_get(TRIPLE(x,a,b));
+                    f_56 += common3_get(TRIPLE(y,a,b));
+                    f_46 += tri[xy];
+                    f_44 += tri[xa]+tri[yb];
+                    f_43 += tri[ab];
+                    f_42 += common_x[b]-2;
+                    f_42 += common_y[a]-2;
+                    f_31 += (deg[x]-2)+(deg[y]-2);
+                    f_30 += (deg[a]-2)+(deg[b]-2);
+                }
+            }
+            // edge-orbit 4 = (6,7)
+            for (int ny1=0;ny1<deg[y];ny1++) {
+                int a=adj[y][ny1];
+                if (a==x || adjacent(x,a)) continue;
+                for (int ny2=ny1+1;ny2<deg[y];ny2++) {
+                    int b=adj[y][ny2];
+                    if (b==x || adjacent(x,b) || adjacent(a,b)) continue;
+                    eorbit[e][4]++;
+                    f_27 += tri[xy];
+                    f_17 += deg[y]-3;
+                    f_15 += (deg[a]-1)+(deg[b]-1);
+                }
+            }
+            for (int nx1=0;nx1<deg[x];nx1++) {
+                int a=adj[x][nx1];
+                if (a==y || adjacent(y,a)) continue;
+                for (int nx2=nx1+1;nx2<deg[x];nx2++) {
+                    int b=adj[x][nx2];
+                    if (b==y || adjacent(y,b) || adjacent(a,b)) continue;
+                    eorbit[e][4]++;
+                    f_27 += tri[xy];
+                    f_17 += deg[x]-3;
+                    f_15 += (deg[a]-1)+(deg[b]-1);
+                }
+            }
+            // edge-orbit 3 = (5,5)
+            for (int nx=0;nx<deg[x];nx++) {
+                int a=adj[x][nx];
+                if (a==y || adjacent(y,a)) continue;
+                for (int ny=0;ny<deg[y];ny++) {
+                    int b=adj[y][ny];
+                    if (b==x || adjacent(x,b) || adjacent(a,b)) continue;
+                    eorbit[e][3]++;
+                    f_20 += tri[xy];
+                    f_16 += (deg[x]-2)+(deg[y]-2);
+                    f_13 += (deg[a]-1)+(deg[b]-1);
+                }
+            }
+            // edge-orbit 2 = (4,5)
+            for (int ny=0;ny<deg[y];ny++) {
+                int a=adj[y][ny];
+                if (a==x || adjacent(x,a)) continue;
+                for (int na=0;na<deg[a];na++) {
+                    int b=adj[a][na], ab=inc[a][na].second;
+                    if (b==y || adjacent(y,b) || adjacent(x,b)) continue;
+                    eorbit[e][2]++;
+                    f_29 += common_y[b]-1;
+                    f_28 += common_x[b];
+                    f_24 += tri[xy];
+                    f_21 += tri[ab];
+                    f_14 += deg[a]-2;
+                    f_12 += deg[b]-1;
+                }
+            }
+            for (int nx=0;nx<deg[x];nx++) {
+                int a=adj[x][nx];
+                if (a==y || adjacent(y,a)) continue;
+                for (int na=0;na<deg[a];na++) {
+                    int b=adj[a][na], ab=inc[a][na].second;
+                    if (b==x || adjacent(x,b) || adjacent(y,b)) continue;
+                    eorbit[e][2]++;
+                    f_29 += common_x[b]-1;
+                    f_28 += common_y[b];
+                    f_24 += tri[xy];
+                    f_21 += tri[ab];
+                    f_14 += deg[a]-2;
+                    f_12 += deg[b]-1;
+                }
+            }
+            // solve system of equations
+            eorbit[e][67]=C5[e];
+            eorbit[e][66]=(f_66-6*eorbit[e][67])/2;
+            eorbit[e][65]=(f_65-6*eorbit[e][67]);
+            eorbit[e][64]=(f_64-2*eorbit[e][66]);
+            eorbit[e][63]=(f_63-2*eorbit[e][65])/2;
+            eorbit[e][62]=(f_62-2*eorbit[e][66]-3*eorbit[e][67]);
+            eorbit[e][61]=(f_61-2*eorbit[e][65]-4*eorbit[e][66]-12*eorbit[e][67]);
+            eorbit[e][60]=(f_60-1*eorbit[e][65]-3*eorbit[e][67]);
+            eorbit[e][59]=(f_59-2*eorbit[e][65])/2;
+            eorbit[e][58]=(f_58-1*eorbit[e][64]-1*eorbit[e][66]);
+            eorbit[e][57]=(f_57-2*eorbit[e][63]-2*eorbit[e][64]-2*eorbit[e][65]);
+            eorbit[e][56]=(f_56-2*eorbit[e][63])/2;
+            eorbit[e][55]=(f_55-4*eorbit[e][62]-2*eorbit[e][64]-4*eorbit[e][66]);
+            eorbit[e][54]=(f_54-1*eorbit[e][61]-2*eorbit[e][63]-2*eorbit[e][65])/2;
+            eorbit[e][53]=(f_53-2*eorbit[e][59]-2*eorbit[e][64]-2*eorbit[e][65]);
+            eorbit[e][52]=(f_52-2*eorbit[e][59]-2*eorbit[e][63]-2*eorbit[e][65]);
+            eorbit[e][51]=(f_51-1*eorbit[e][61]-2*eorbit[e][62]-1*eorbit[e][65]-4*eorbit[e][66]-6*eorbit[e][67]);
+            eorbit[e][50]=(f_50-2*eorbit[e][60]-1*eorbit[e][61]-2*eorbit[e][65]-2*eorbit[e][66]-6*eorbit[e][67]);
+            eorbit[e][49]=(f_49-1*eorbit[e][59])/3;
+            eorbit[e][48]=(f_48-2*eorbit[e][62]-1*eorbit[e][66])/3;
+            eorbit[e][47]=(f_47-2*eorbit[e][59]-1*eorbit[e][61]-2*eorbit[e][65])/2;
+            eorbit[e][46]=(f_46-1*eorbit[e][57]-1*eorbit[e][63]);
+            eorbit[e][45]=(f_45-1*eorbit[e][52]-4*eorbit[e][58]-4*eorbit[e][60]);
+            eorbit[e][44]=(f_44-2*eorbit[e][56]-1*eorbit[e][57]-2*eorbit[e][63]);
+            eorbit[e][43]=(f_43-2*eorbit[e][56]-1*eorbit[e][63]);
+            eorbit[e][42]=(f_42-2*eorbit[e][56]-1*eorbit[e][57]-2*eorbit[e][63])/2;
+            eorbit[e][41]=(f_41-1*eorbit[e][55]-2*eorbit[e][58]-2*eorbit[e][62]-2*eorbit[e][64]-2*eorbit[e][66]);
+            eorbit[e][40]=(f_40-2*eorbit[e][54]-1*eorbit[e][55]-1*eorbit[e][57]-1*eorbit[e][61]-2*eorbit[e][63]-2*eorbit[e][64]-2*eorbit[e][65]);
+            eorbit[e][39]=(f_39-1*eorbit[e][52]-1*eorbit[e][53]-1*eorbit[e][57]-2*eorbit[e][59]-2*eorbit[e][63]-2*eorbit[e][64]-2*eorbit[e][65]);
+            eorbit[e][38]=(f_38-3*eorbit[e][49]-1*eorbit[e][56]-1*eorbit[e][59]);
+            eorbit[e][37]=(f_37-1*eorbit[e][53]-1*eorbit[e][59]);
+            eorbit[e][36]=(f_36-1*eorbit[e][52]-2*eorbit[e][60])/2;
+            eorbit[e][35]=(f_35-6*eorbit[e][48]-1*eorbit[e][55]-4*eorbit[e][62]-1*eorbit[e][64]-2*eorbit[e][66]);
+            eorbit[e][34]=(f_34-2*eorbit[e][47]-1*eorbit[e][53]-1*eorbit[e][55]-2*eorbit[e][59]-1*eorbit[e][61]-2*eorbit[e][64]-2*eorbit[e][65]);
+            eorbit[e][33]=(f_33-2*eorbit[e][47]-1*eorbit[e][52]-2*eorbit[e][54]-2*eorbit[e][59]-1*eorbit[e][61]-2*eorbit[e][63]-2*eorbit[e][65]);
+            eorbit[e][32]=(f_32-6*eorbit[e][49]-1*eorbit[e][53]-2*eorbit[e][59])/2;
+            eorbit[e][31]=(f_31-2*eorbit[e][42]-1*eorbit[e][44]-2*eorbit[e][46]-2*eorbit[e][56]-2*eorbit[e][57]-2*eorbit[e][63]);
+            eorbit[e][30]=(f_30-2*eorbit[e][42]-2*eorbit[e][43]-1*eorbit[e][44]-4*eorbit[e][56]-1*eorbit[e][57]-2*eorbit[e][63]);
+            eorbit[e][29]=(f_29-2*eorbit[e][38]-1*eorbit[e][45]-1*eorbit[e][52])/2;
+            eorbit[e][28]=(f_28-2*eorbit[e][43]-1*eorbit[e][45]-1*eorbit[e][52])/2;
+            eorbit[e][27]=(f_27-1*eorbit[e][34]-1*eorbit[e][47]);
+            eorbit[e][26]=(f_26-1*eorbit[e][33]-2*eorbit[e][36]-1*eorbit[e][50]-1*eorbit[e][52]-2*eorbit[e][60])/2;
+            eorbit[e][25]=(f_25-2*eorbit[e][32]-1*eorbit[e][37]-3*eorbit[e][49]-1*eorbit[e][53]-1*eorbit[e][59]);
+            eorbit[e][24]=(f_24-1*eorbit[e][39]-1*eorbit[e][45]-1*eorbit[e][52]);
+            eorbit[e][23]=(f_23-2*eorbit[e][36]-1*eorbit[e][45]-1*eorbit[e][52]-2*eorbit[e][58]-2*eorbit[e][60]);
+            eorbit[e][22]=(f_22-1*eorbit[e][37]-1*eorbit[e][44]-1*eorbit[e][53]-1*eorbit[e][56]-1*eorbit[e][59]);
+            eorbit[e][21]=(f_21-2*eorbit[e][38]-2*eorbit[e][43]-1*eorbit[e][52])/2;
+            eorbit[e][20]=(f_20-1*eorbit[e][40]-1*eorbit[e][54]);
+            eorbit[e][19]=(f_19-1*eorbit[e][33]-2*eorbit[e][41]-1*eorbit[e][45]-2*eorbit[e][50]-1*eorbit[e][52]-4*eorbit[e][58]-4*eorbit[e][60]);
+            eorbit[e][18]=(f_18-2*eorbit[e][32]-2*eorbit[e][38]-1*eorbit[e][44]-6*eorbit[e][49]-1*eorbit[e][53]-2*eorbit[e][56]-2*eorbit[e][59]);
+            eorbit[e][17]=(f_17-2*eorbit[e][25]-1*eorbit[e][27]-1*eorbit[e][32]-1*eorbit[e][34]-1*eorbit[e][47])/3;
+            eorbit[e][16]=(f_16-2*eorbit[e][20]-2*eorbit[e][22]-1*eorbit[e][31]-2*eorbit[e][40]-1*eorbit[e][44]-2*eorbit[e][54])/2;
+            eorbit[e][15]=(f_15-2*eorbit[e][25]-2*eorbit[e][29]-1*eorbit[e][31]-2*eorbit[e][32]-1*eorbit[e][34]-2*eorbit[e][42]-2*eorbit[e][47]);
+            eorbit[e][14]=(f_14-1*eorbit[e][18]-2*eorbit[e][21]-1*eorbit[e][30]-2*eorbit[e][38]-1*eorbit[e][39]-2*eorbit[e][43]-1*eorbit[e][52])/2;
+            eorbit[e][13]=(f_13-2*eorbit[e][22]-2*eorbit[e][28]-1*eorbit[e][31]-1*eorbit[e][40]-2*eorbit[e][44]-2*eorbit[e][54]);
+            eorbit[e][12]=(f_12-2*eorbit[e][21]-2*eorbit[e][28]-2*eorbit[e][29]-2*eorbit[e][38]-2*eorbit[e][43]-1*eorbit[e][45]-1*eorbit[e][52]);
+        }
+    }
+    endTime = clock();
+    printf("%.2f\n", (double)(endTime-startTime)/CLOCKS_PER_SEC);
+    endTime_all = endTime;
+    printf("total: %.2f\n", (double)(endTime_all-startTime_all)/CLOCKS_PER_SEC);
+}
+fstream fin, fout; // input and output files
+int GS=5;
+string orbit_type;
+int motif_counts(char* orbit_type, int graphlet_size, const char* input_filename, const char* output_filename) {
+    // open input, output files
+    if (strcmp(orbit_type, "node")!=0 && strcmp(orbit_type, "edge")!=0) {
+        cerr << "Incorrect orbit type '" << orbit_type << "'. Should be 'node' or 'edge'." << endl;
+        return 0;
+    }
+    if (GS!=4 && GS!=5) {
+        cerr << "Incorrect graphlet size " << graphlet_size << ". Should be 4 or 5." << endl;
+        return 0;
+    }
+    fin.open(input_filename, fstream::in);
+    fout.open(output_filename, fstream::out | fstream::binary);
+    if (fin.fail()) {
+        cerr << "Failed to open file " << input_filename << endl;
+        return 0;
+    }
+    if (fout.fail()) {
+        cerr << "Failed to open file " << output_filename << endl;
+        return 0;
+    }
+    // read input graph
+    fin >> n >> m;
+    int d_max=0;
+    edges = (PAIR*)malloc(m*sizeof(PAIR));
+    deg = (int*)calloc(n,sizeof(int));
+    for (int i=0;i<m;i++) {
+        int a,b;
+        fin >> a >> b;
+        if (!(0<=a && a<n) || !(0<=b && b<n)) {
+            cerr << "Node ids should be between 0 and n-1." << endl;
+            return 0;
+        }
+        if (a==b) {
+            cerr << "Self loops (edge from x to x) are not allowed." << endl;
+            return 0;
+        }
+        deg[a]++; deg[b]++;
+        edges[i]=PAIR(a,b);
+    }
+    for (int i=0;i<n;i++) d_max=max(d_max,deg[i]);
+    printf("nodes: %d\n",n);
+    printf("edges: %d\n",m);
+    printf("max degree: %d\n",d_max);
+    fin.close();
+    if ((int)(set<PAIR>(edges,edges+m).size())!=m) {
+        cerr << "Input file contains duplicate undirected edges." << endl;
+        return 0;
+    }
+    // set up adjacency matrix if it's smaller than 100MB
+    if ((int64)n*n < 100LL*1024*1024*8) {
+        adjacent = adjacent_matrix;
+        adj_matrix = (int*)calloc((n*n)/adj_chunk+1,sizeof(int));
+        for (int i=0;i<m;i++) {
+            int a=edges[i].a, b=edges[i].b;
+            adj_matrix[(a*n+b)/adj_chunk]|=(1<<((a*n+b)%adj_chunk));
+            adj_matrix[(b*n+a)/adj_chunk]|=(1<<((b*n+a)%adj_chunk));
+        }
+    } else {
+        adjacent = adjacent_list;
+    }
+    // set up adjacency, incidence lists
+    adj = (int**)malloc(n*sizeof(int*));
+    for (int i=0;i<n;i++) adj[i] = (int*)malloc(deg[i]*sizeof(int));
+    inc = (PII**)malloc(n*sizeof(PII*));
+    for (int i=0;i<n;i++) inc[i] = (PII*)malloc(deg[i]*sizeof(PII));
+    int *d = (int*)calloc(n,sizeof(int));
+    for (int i=0;i<m;i++) {
+        int a=edges[i].a, b=edges[i].b;
+        adj[a][d[a]]=b; adj[b][d[b]]=a;
+        inc[a][d[a]]=PII(b,i); inc[b][d[b]]=PII(a,i);
+        d[a]++; d[b]++;
+    }
+    for (int i=0;i<n;i++) {
+        sort(adj[i],adj[i]+deg[i]);
+        sort(inc[i],inc[i]+deg[i]);
+    }
+    // initialize orbit counts
+    orbit = (int64**)malloc(n*sizeof(int64*));
+    for (int i=0;i<n;i++) orbit[i] = (int64*)calloc(73,sizeof(int64));
+    // initialize edge orbit counts
+    eorbit = (int64**)malloc(m*sizeof(int64*));
+    for (int i=0;i<m;i++) eorbit[i] = (int64*)calloc(68,sizeof(int64));
+    return 1;
+}
+int init(int argc, char *argv[]) {
+    if (argc!=5) {
+        cerr << "Incorrect number of arguments." << endl;
+        cerr << "Usage: orca.exe [orbit type: node|edge] [graphlet size: 4/5] [graph - input file] [graphlets - output file]" << endl;
+        return 0;
+    }
+    int graphlet_size;
+    sscanf(argv[2],"%d", &graphlet_size);
+    motif_counts(argv[1], graphlet_size, argv[3], argv[4]);
+}
+void writeResults(int g=5) {
+    int no[] = {0,0,1,4,15,73};
+    for (int i=0;i<n;i++) {
+        for (int j=0;j<no[g];j++) {
+            if (j!=0) fout << " ";
+            fout << orbit[i][j];
+        }
+        fout << endl;
+    }
+    fout.close();
+}
+void writeEdgeResults(int g=5) {
+    int no[] = {0,0,0,2,12,68};
+    for (int i=0;i<m;i++) {
+        for (int j=0;j<no[g];j++) {
+            if (j!=0) fout << " ";
+            fout << eorbit[i][j];
+        }
+        fout << endl;
+    }
+    fout.close();
+}
+//int main(int argc, char *argv[]) {
+//
+//
+//    if (!init(argc, argv)) {
+//        cerr << "Stopping!" << endl;
+//        return 0;
+//    }
+//    if (orbit_type=="node") {
+//        printf("Counting NODE orbits of graphlets on %d nodes.\n\n",GS);
+//        if (GS==4) count4();
+//        if (GS==5) count5();
+//        writeResults(GS);
+//    } else {
+//        printf("Counting EDGE orbits of graphlets on %d nodes.\n\n",GS);
+//        if (GS==4) ecount4();
+//        if (GS==5) ecount5();
+//        writeEdgeResults(GS);
+//    }
+//
+//
+//    return 0;
+//}

analysis/orca/tmp_JJOX0U87.txt ADDED Viewed

	@@ -0,0 +1,25 @@

+9 24
+0 1
+0 2
+0 5
+0 6
+0 7
+1 4
+1 6
+1 8
+2 3
+2 4
+2 6
+2 8
+3 4
+3 5
+3 7
+4 5
+4 7
+4 8
+5 6
+5 7
+5 8
+6 7
+6 8
+7 8

analysis/orca/tmp_YX4O2JRL.txt ADDED Viewed

	@@ -0,0 +1,3269 @@

+217 3268
+0 1
+0 2
+0 4
+0 8
+0 9
+0 11
+0 13
+0 16
+0 17
+0 30
+0 33
+0 35
+0 36
+0 42
+0 47
+0 50
+0 51
+0 52
+0 53
+0 54
+0 55
+0 56
+0 57
+0 58
+1 2
+1 3
+1 4
+1 7
+1 8
+1 9
+1 10
+1 11
+1 12
+1 13
+1 14
+1 15
+1 16
+1 17
+1 18
+1 19
+1 20
+1 21
+1 22
+1 23
+1 24
+1 25
+1 27
+1 28
+1 29
+1 30
+1 32
+1 33
+1 34
+1 35
+1 36
+1 37
+1 38
+1 39
+1 40
+1 41
+1 42
+1 43
+1 46
+1 47
+1 48
+1 49
+1 50
+1 51
+1 52
+1 53
+1 54
+1 55
+1 57
+1 62
+1 64
+1 69
+1 80
+1 95
+1 97
+1 98
+1 99
+1 100
+1 101
+1 103
+1 104
+1 105
+1 106
+1 107
+1 108
+1 111
+1 112
+1 115
+1 116
+1 117
+1 118
+1 120
+1 122
+1 124
+1 127
+1 128
+1 137
+1 162
+1 164
+1 165
+1 197
+2 3
+2 4
+2 5
+2 8
+2 9
+2 10
+2 11
+2 12
+2 13
+2 14
+2 16
+2 17
+2 18
+2 19
+2 20
+2 21
+2 22
+2 23
+2 24
+2 25
+2 27
+2 28
+2 29
+2 30
+2 31
+2 33
+2 35
+2 36
+2 37
+2 38
+2 39
+2 41
+2 42
+2 43
+2 44
+2 45
+2 46
+2 47
+2 48
+2 49
+2 50
+2 51
+2 52
+2 53
+2 54
+2 55
+2 56
+2 57
+2 59
+2 60
+2 62
+2 63
+2 64
+2 65
+2 66
+2 69
+2 80
+2 95
+2 96
+2 97
+2 98
+2 99
+2 100
+2 101
+2 103
+2 104
+2 105
+2 106
+2 107
+2 108
+2 110
+2 111
+2 112
+2 113
+2 114
+2 115
+2 116
+2 117
+2 118
+2 119
+2 120
+2 121
+2 122
+2 123
+2 126
+2 127
+2 128
+2 129
+2 131
+2 135
+2 136
+2 137
+2 154
+2 158
+2 162
+2 164
+2 175
+2 202
+3 6
+3 7
+3 8
+3 11
+3 13
+3 14
+3 15
+3 16
+3 17
+3 18
+3 19
+3 20
+3 25
+3 26
+3 27
+3 29
+3 31
+3 32
+3 33
+3 34
+3 35
+3 37
+3 38
+3 39
+3 40
+3 41
+3 43
+3 44
+3 46
+3 47
+3 48
+3 49
+3 50
+3 51
+3 52
+3 54
+3 55
+3 56
+3 57
+3 69
+3 80
+3 95
+3 99
+3 100
+3 101
+3 102
+3 103
+3 104
+3 105
+3 107
+3 108
+3 111
+3 112
+3 114
+3 115
+3 116
+3 118
+3 121
+3 122
+3 124
+3 126
+3 128
+3 129
+3 130
+3 131
+3 136
+4 5
+4 7
+4 8
+4 9
+4 10
+4 11
+4 15
+4 16
+4 17
+4 20
+4 21
+4 25
+4 27
+4 28
+4 30
+4 32
+4 33
+4 34
+4 35
+4 36
+4 37
+4 38
+4 39
+4 41
+4 43
+4 44
+4 45
+4 46
+4 47
+4 49
+4 50
+4 51
+4 53
+4 54
+4 55
+4 56
+4 57
+4 59
+4 60
+4 61
+4 63
+4 65
+4 67
+4 69
+4 70
+4 80
+4 85
+4 87
+4 91
+4 92
+4 93
+4 94
+4 97
+4 98
+4 101
+4 103
+4 104
+4 105
+4 106
+4 107
+4 108
+4 110
+4 111
+4 112
+4 115
+4 116
+4 117
+4 118
+4 119
+4 120
+4 122
+4 123
+4 127
+4 128
+4 129
+4 130
+4 137
+4 138
+4 139
+4 140
+4 142
+4 146
+4 147
+4 148
+4 150
+4 151
+4 152
+4 154
+4 155
+4 156
+4 157
+4 164
+4 175
+4 176
+4 196
+4 197
+4 202
+5 8
+5 11
+5 13
+5 16
+5 17
+5 20
+5 25
+5 27
+5 30
+5 32
+5 33
+5 35
+5 36
+5 37
+5 38
+5 42
+5 43
+5 45
+5 46
+5 47
+5 50
+5 51
+5 53
+5 54
+5 55
+5 56
+5 57
+5 60
+5 64
+5 88
+5 98
+5 99
+5 101
+5 104
+5 105
+5 106
+5 107
+5 110
+5 116
+5 117
+5 119
+5 120
+5 122
+5 123
+5 137
+5 164
+5 202
+5 204
+6 7
+6 15
+6 16
+6 20
+6 21
+6 22
+6 23
+6 28
+6 29
+6 31
+6 32
+6 33
+6 34
+6 37
+6 39
+6 41
+6 51
+6 101
+6 103
+6 116
+6 131
+6 135
+6 136
+7 8
+7 10
+7 11
+7 12
+7 13
+7 14
+7 16
+7 17
+7 18
+7 19
+7 20
+7 21
+7 22
+7 23
+7 24
+7 25
+7 26
+7 27
+7 28
+7 29
+7 30
+7 31
+7 33
+7 35
+7 36
+7 37
+7 38
+7 39
+7 40
+7 41
+7 42
+7 43
+7 46
+7 47
+7 48
+7 49
+7 50
+7 51
+7 52
+7 55
+7 56
+7 57
+7 60
+7 63
+7 64
+7 65
+7 69
+7 97
+7 101
+7 103
+7 104
+7 105
+7 108
+7 112
+7 115
+7 116
+7 118
+7 122
+7 124
+7 126
+7 127
+7 129
+7 130
+7 131
+7 135
+7 136
+7 137
+8 9
+8 10
+8 11
+8 12
+8 13
+8 16
+8 17
+8 20
+8 21
+8 22
+8 25
+8 27
+8 28
+8 30
+8 32
+8 33
+8 35
+8 36
+8 37
+8 38
+8 39
+8 41
+8 42
+8 43
+8 44
+8 45
+8 46
+8 47
+8 48
+8 49
+8 50
+8 51
+8 53
+8 54
+8 55
+8 56
+8 57
+8 60
+8 62
+8 64
+8 66
+8 68
+8 95
+8 97
+8 98
+8 99
+8 101
+8 102
+8 104
+8 105
+8 106
+8 107
+8 108
+8 110
+8 115
+8 116
+8 117
+8 119
+8 120
+8 123
+8 126
+8 137
+8 164
+8 165
+8 175
+8 177
+9 10
+9 13
+9 16
+9 17
+9 20
+9 25
+9 28
+9 30
+9 33
+9 35
+9 36
+9 37
+9 38
+9 39
+9 42
+9 47
+9 49
+9 53
+9 55
+9 56
+9 57
+9 58
+9 62
+9 64
+9 101
+9 108
+9 110
+9 116
+9 117
+9 118
+9 120
+9 127
+9 137
+10 12
+10 13
+10 14
+10 16
+10 17
+10 18
+10 19
+10 20
+10 25
+10 30
+10 32
+10 33
+10 36
+10 37
+10 38
+10 39
+10 41
+10 42
+10 46
+10 47
+10 48
+10 51
+10 53
+10 55
+10 56
+10 57
+10 62
+10 64
+10 95
+10 97
+10 101
+10 103
+10 116
+10 126
+10 127
+10 165
+10 208
+11 13
+11 15
+11 16
+11 17
+11 18
+11 20
+11 21
+11 22
+11 23
+11 25
+11 28
+11 30
+11 33
+11 35
+11 36
+11 37
+11 38
+11 41
+11 42
+11 43
+11 45
+11 46
+11 47
+11 48
+11 49
+11 50
+11 51
+11 53
+11 54
+11 55
+11 56
+11 57
+11 97
+11 99
+11 103
+11 104
+11 105
+11 106
+11 107
+11 108
+11 110
+11 112
+11 117
+11 119
+11 120
+11 123
+11 131
+11 164
+11 175
+12 15
+12 16
+12 21
+12 23
+12 25
+12 33
+12 35
+12 36
+12 37
+12 38
+12 43
+12 47
+12 49
+12 50
+12 51
+12 53
+12 56
+12 97
+12 101
+12 104
+12 106
+12 110
+12 117
+13 15
+13 16
+13 17
+13 18
+13 20
+13 21
+13 22
+13 23
+13 25
+13 27
+13 28
+13 30
+13 32
+13 33
+13 34
+13 35
+13 36
+13 37
+13 38
+13 39
+13 41
+13 43
+13 44
+13 45
+13 46
+13 47
+13 48
+13 49
+13 50
+13 51
+13 52
+13 53
+13 54
+13 55
+13 56
+13 57
+13 60
+13 61
+13 63
+13 65
+13 69
+13 80
+13 87
+13 95
+13 97
+13 98
+13 99
+13 100
+13 101
+13 103
+13 104
+13 105
+13 106
+13 107
+13 108
+13 110
+13 111
+13 112
+13 115
+13 116
+13 117
+13 118
+13 119
+13 120
+13 121
+13 122
+13 123
+13 126
+13 127
+13 128
+13 137
+13 154
+13 155
+13 156
+13 157
+13 164
+13 175
+13 196
+14 15
+14 16
+14 19
+14 20
+14 21
+14 29
+14 30
+14 31
+14 32
+14 34
+14 37
+14 51
+14 57
+14 101
+14 103
+14 116
+14 118
+14 122
+14 126
+14 127
+14 162
+15 16
+15 17
+15 18
+15 19
+15 20
+15 21
+15 22
+15 23
+15 24
+15 25
+15 26
+15 27
+15 28
+15 29
+15 30
+15 31
+15 33
+15 35
+15 37
+15 38
+15 39
+15 40
+15 41
+15 42
+15 43
+15 46
+15 47
+15 49
+15 50
+15 51
+15 54
+15 56
+15 60
+15 63
+15 64
+15 68
+15 69
+15 97
+15 99
+15 101
+15 102
+15 103
+15 104
+15 105
+15 108
+15 112
+15 114
+15 115
+15 116
+15 118
+15 122
+15 126
+15 127
+15 128
+15 129
+15 130
+15 131
+15 135
+15 136
+15 158
+16 17
+16 18
+16 19
+16 20
+16 21
+16 22
+16 23
+16 24
+16 25
+16 26
+16 27
+16 28
+16 29
+16 30
+16 31
+16 32
+16 33
+16 34
+16 35
+16 36
+16 37
+16 38
+16 39
+16 40
+16 41
+16 42
+16 43
+16 44
+16 45
+16 46
+16 47
+16 48
+16 49
+16 50
+16 51
+16 52
+16 53
+16 54
+16 55
+16 56
+16 57
+16 60
+16 62
+16 64
+16 65
+16 66
+16 68
+16 69
+16 70
+16 71
+16 73
+16 74
+16 80
+16 81
+16 87
+16 91
+16 92
+16 93
+16 94
+16 95
+16 96
+16 97
+16 98
+16 99
+16 100
+16 101
+16 102
+16 103
+16 104
+16 105
+16 106
+16 107
+16 108
+16 109
+16 110
+16 111
+16 112
+16 113
+16 114
+16 115
+16 116
+16 117
+16 118
+16 119
+16 120
+16 121
+16 122
+16 123
+16 124
+16 126
+16 127
+16 128
+16 129
+16 131
+16 133
+16 134
+16 137
+16 138
+16 139
+16 140
+16 142
+16 145
+16 146
+16 147
+16 148
+16 149
+16 150
+16 151
+16 152
+16 154
+16 160
+16 162
+16 164
+16 165
+16 175
+16 176
+16 177
+16 183
+16 197
+16 202
+16 204
+16 214
+17 18
+17 19
+17 20
+17 21
+17 22
+17 23
+17 25
+17 27
+17 28
+17 29
+17 30
+17 31
+17 32
+17 33
+17 34
+17 35
+17 36
+17 37
+17 38
+17 39
+17 40
+17 41
+17 42
+17 43
+17 47
+17 50
+17 51
+17 53
+17 54
+17 55
+17 56
+17 57
+17 60
+17 62
+17 64
+17 95
+17 97
+17 98
+17 99
+17 101
+17 103
+17 104
+17 105
+17 106
+17 110
+17 112
+17 115
+17 116
+17 117
+17 118
+17 122
+17 127
+17 128
+17 129
+17 131
+17 137
+17 164
+17 165
+17 197
+18 21
+18 22
+18 23
+18 25
+18 28
+18 30
+18 32
+18 33
+18 35
+18 36
+18 38
+18 43
+18 46
+18 47
+18 48
+18 49
+18 53
+18 55
+18 57
+18 95
+18 97
+18 104
+18 106
+18 107
+18 108
+19 21
+19 22
+19 23
+19 25
+19 27
+19 28
+19 30
+19 32
+19 33
+19 34
+19 35
+19 38
+19 40
+19 41
+19 47
+19 50
+19 51
+19 101
+19 103
+19 112
+19 116
+19 126
+19 129
+19 158
+19 168
+19 170
+20 21
+20 22
+20 23
+20 25
+20 27
+20 28
+20 30
+20 31
+20 32
+20 33
+20 34
+20 35
+20 36
+20 38
+20 39
+20 40
+20 41
+20 42
+20 43
+20 44
+20 47
+20 50
+20 51
+20 53
+20 54
+20 55
+20 56
+20 57
+20 59
+20 60
+20 62
+20 63
+20 64
+20 65
+20 66
+20 69
+20 80
+20 85
+20 87
+20 97
+20 98
+20 99
+20 100
+20 101
+20 103
+20 104
+20 105
+20 106
+20 110
+20 111
+20 116
+20 117
+20 120
+20 121
+20 123
+20 126
+20 131
+20 137
+20 161
+21 24
+21 25
+21 26
+21 27
+21 29
+21 31
+21 32
+21 33
+21 34
+21 35
+21 36
+21 37
+21 38
+21 39
+21 40
+21 41
+21 42
+21 43
+21 44
+21 46
+21 47
+21 48
+21 49
+21 50
+21 51
+21 52
+21 54
+21 55
+21 57
+21 64
+21 69
+21 80
+21 96
+21 97
+21 99
+21 100
+21 101
+21 103
+21 104
+21 105
+21 107
+21 108
+21 111
+21 112
+21 113
+21 114
+21 115
+21 116
+21 117
+21 118
+21 121
+21 122
+21 124
+21 126
+21 128
+21 129
+21 130
+21 131
+21 135
+21 136
+21 164
+22 25
+22 26
+22 27
+22 29
+22 31
+22 32
+22 33
+22 34
+22 35
+22 37
+22 38
+22 39
+22 40
+22 41
+22 43
+22 44
+22 46
+22 47
+22 48
+22 49
+22 50
+22 51
+22 55
+22 57
+22 64
+22 69
+22 80
+22 95
+22 97
+22 99
+22 100
+22 101
+22 103
+22 104
+22 105
+22 107
+22 108
+22 111
+22 112
+22 114
+22 115
+22 116
+22 117
+22 121
+22 122
+22 126
+22 128
+22 129
+22 131
+22 136
+23 24
+23 25
+23 26
+23 27
+23 29
+23 31
+23 32
+23 33
+23 34
+23 35
+23 36
+23 37
+23 38
+23 39
+23 40
+23 41
+23 43
+23 46
+23 47
+23 48
+23 50
+23 51
+23 54
+23 59
+23 63
+23 65
+23 68
+23 69
+23 80
+23 83
+23 85
+23 97
+23 99
+23 100
+23 101
+23 102
+23 103
+23 104
+23 105
+23 107
+23 108
+23 111
+23 112
+23 114
+23 115
+23 116
+23 117
+23 118
+23 121
+23 122
+23 124
+23 126
+23 127
+23 128
+23 129
+23 130
+23 131
+23 135
+23 136
+23 158
+23 162
+24 27
+24 28
+24 29
+24 31
+24 32
+24 34
+24 35
+24 39
+24 40
+24 43
+24 51
+24 59
+24 63
+24 65
+24 69
+24 80
+24 85
+24 97
+24 101
+24 103
+24 111
+24 114
+24 115
+24 116
+24 122
+24 126
+24 130
+24 161
+25 27
+25 28
+25 29
+25 30
+25 31
+25 32
+25 33
+25 34
+25 35
+25 36
+25 37
+25 38
+25 39
+25 40
+25 41
+25 42
+25 43
+25 44
+25 45
+25 46
+25 47
+25 48
+25 49
+25 50
+25 51
+25 53
+25 54
+25 55
+25 56
+25 57
+25 60
+25 62
+25 63
+25 64
+25 65
+25 66
+25 68
+25 69
+25 71
+25 80
+25 87
+25 91
+25 92
+25 94
+25 97
+25 98
+25 99
+25 100
+25 102
+25 103
+25 104
+25 105
+25 106
+25 107
+25 108
+25 110
+25 111
+25 112
+25 114
+25 115
+25 116
+25 117
+25 118
+25 119
+25 120
+25 121
+25 122
+25 123
+25 124
+25 126
+25 127
+25 128
+25 129
+25 131
+25 137
+25 139
+25 150
+25 151
+25 152
+25 154
+25 164
+25 165
+25 175
+25 176
+25 202
+25 204
+25 214
+26 28
+26 31
+26 32
+26 33
+26 34
+26 38
+26 39
+26 41
+26 48
+26 51
+26 68
+26 112
+26 131
+26 135
+26 136
+27 28
+27 30
+27 32
+27 33
+27 34
+27 35
+27 37
+27 38
+27 41
+27 42
+27 43
+27 46
+27 47
+27 50
+27 51
+27 64
+27 80
+27 97
+27 98
+27 101
+27 102
+27 103
+27 105
+27 106
+27 107
+27 108
+27 111
+27 112
+27 115
+27 116
+27 118
+27 122
+27 126
+27 127
+27 128
+27 129
+27 131
+28 29
+28 31
+28 32
+28 33
+28 34
+28 35
+28 36
+28 37
+28 38
+28 39
+28 40
+28 41
+28 42
+28 43
+28 44
+28 46
+28 47
+28 48
+28 49
+28 50
+28 51
+28 52
+28 54
+28 55
+28 56
+28 57
+28 64
+28 68
+28 69
+28 80
+28 95
+28 96
+28 97
+28 99
+28 100
+28 101
+28 102
+28 103
+28 104
+28 105
+28 107
+28 108
+28 111
+28 112
+28 113
+28 114
+28 115
+28 116
+28 117
+28 120
+28 121
+28 122
+28 124
+28 126
+28 128
+28 129
+28 131
+28 135
+28 136
+28 164
+28 175
+29 30
+29 31
+29 32
+29 33
+29 34
+29 35
+29 36
+29 39
+29 40
+29 41
+29 47
+29 50
+29 51
+29 101
+29 103
+29 112
+29 115
+29 116
+29 126
+29 129
+29 158
+29 168
+30 32
+30 33
+30 35
+30 36
+30 37
+30 38
+30 39
+30 41
+30 42
+30 43
+30 45
+30 47
+30 50
+30 51
+30 53
+30 54
+30 55
+30 56
+30 57
+30 59
+30 60
+30 62
+30 63
+30 64
+30 65
+30 68
+30 69
+30 80
+30 85
+30 87
+30 95
+30 97
+30 98
+30 101
+30 103
+30 104
+30 105
+30 106
+30 107
+30 108
+30 110
+30 111
+30 112
+30 115
+30 116
+30 117
+30 118
+30 119
+30 120
+30 122
+30 123
+30 126
+30 127
+30 128
+30 129
+30 137
+30 139
+30 158
+30 164
+30 165
+30 202
+31 32
+31 34
+31 35
+31 37
+31 38
+31 39
+31 40
+31 41
+31 46
+31 47
+31 50
+31 55
+31 57
+31 69
+31 101
+31 103
+31 112
+31 116
+31 118
+31 122
+31 126
+31 127
+31 129
+31 131
+31 162
+32 33
+32 35
+32 36
+32 37
+32 38
+32 39
+32 40
+32 41
+32 42
+32 43
+32 46
+32 47
+32 50
+32 51
+32 53
+32 54
+32 55
+32 56
+32 57
+32 62
+32 64
+32 65
+32 68
+32 69
+32 80
+32 97
+32 101
+32 102
+32 103
+32 104
+32 105
+32 107
+32 111
+32 112
+32 116
+32 118
+32 122
+32 126
+32 127
+32 128
+32 129
+32 131
+32 135
+32 136
+32 137
+32 158
+32 202
+33 34
+33 35
+33 36
+33 37
+33 38
+33 39
+33 40
+33 41
+33 42
+33 43
+33 44
+33 45
+33 46
+33 47
+33 48
+33 49
+33 51
+33 52
+33 53
+33 54
+33 55
+33 56
+33 57
+33 60
+33 62
+33 64
+33 66
+33 68
+33 80
+33 97
+33 98
+33 99
+33 103
+33 104
+33 105
+33 106
+33 107
+33 108
+33 110
+33 111
+33 112
+33 115
+33 116
+33 117
+33 118
+33 119
+33 120
+33 121
+33 123
+33 125
+33 126
+33 127
+33 128
+33 131
+33 137
+33 154
+33 164
+33 175
+33 197
+33 202
+34 35
+34 37
+34 38
+34 39
+34 40
+34 41
+34 42
+34 43
+34 46
+34 47
+34 50
+34 51
+34 56
+34 101
+34 103
+34 104
+34 112
+34 115
+34 116
+34 122
+34 127
+34 131
+34 135
+34 136
+35 36
+35 37
+35 38
+35 40
+35 41
+35 42
+35 43
+35 44
+35 45
+35 46
+35 47
+35 48
+35 49
+35 50
+35 51
+35 52
+35 53
+35 54
+35 55
+35 56
+35 57
+35 60
+35 62
+35 64
+35 65
+35 66
+35 68
+35 69
+35 80
+35 87
+35 95
+35 97
+35 98
+35 99
+35 100
+35 101
+35 102
+35 103
+35 104
+35 105
+35 106
+35 107
+35 109
+35 110
+35 111
+35 112
+35 113
+35 114
+35 115
+35 116
+35 117
+35 118
+35 119
+35 120
+35 122
+35 124
+35 125
+35 126
+35 127
+35 128
+35 130
+35 137
+35 162
+35 164
+35 197
+36 37
+36 38
+36 39
+36 41
+36 42
+36 43
+36 44
+36 45
+36 46
+36 47
+36 48
+36 49
+36 50
+36 51
+36 52
+36 53
+36 54
+36 55
+36 56
+36 57
+36 60
+36 62
+36 63
+36 64
+36 65
+36 66
+36 68
+36 69
+36 70
+36 71
+36 73
+36 76
+36 77
+36 80
+36 81
+36 85
+36 87
+36 91
+36 92
+36 93
+36 94
+36 95
+36 97
+36 98
+36 99
+36 100
+36 101
+36 102
+36 104
+36 105
+36 106
+36 107
+36 108
+36 110
+36 111
+36 112
+36 115
+36 116
+36 117
+36 118
+36 120
+36 123
+36 124
+36 125
+36 128
+36 137
+36 138
+36 139
+36 140
+36 142
+36 145
+36 146
+36 147
+36 148
+36 149
+36 150
+36 151
+36 152
+36 154
+36 160
+36 164
+36 175
+36 176
+36 177
+36 204
+36 214
+37 38
+37 39
+37 40
+37 41
+37 42
+37 43
+37 44
+37 47
+37 50
+37 51
+37 53
+37 54
+37 55
+37 56
+37 57
+37 59
+37 60
+37 62
+37 63
+37 64
+37 65
+37 66
+37 69
+37 80
+37 85
+37 97
+37 98
+37 99
+37 101
+37 103
+37 104
+37 105
+37 106
+37 107
+37 110
+37 111
+37 116
+37 117
+37 118
+37 120
+37 121
+37 122
+37 123
+37 126
+37 127
+37 128
+37 130
+37 131
+37 137
+37 161
+37 164
+37 202
+38 39
+38 40
+38 41
+38 42
+38 43
+38 45
+38 46
+38 47
+38 48
+38 49
+38 50
+38 51
+38 52
+38 53
+38 54
+38 55
+38 56
+38 57
+38 62
+38 64
+38 66
+38 87
+38 95
+38 97
+38 99
+38 102
+38 103
+38 104
+38 105
+38 106
+38 107
+38 108
+38 112
+38 115
+38 116
+38 117
+38 118
+38 119
+38 120
+38 121
+38 123
+38 125
+38 126
+38 127
+38 128
+38 131
+38 164
+38 175
+38 197
+38 202
+39 40
+39 42
+39 47
+39 51
+39 55
+39 57
+39 101
+39 103
+39 112
+39 116
+39 118
+39 122
+39 126
+39 127
+39 131
+39 137
+39 162
+40 41
+40 46
+40 48
+40 51
+40 57
+40 59
+40 63
+40 67
+40 68
+40 83
+40 102
+40 103
+40 104
+40 112
+40 122
+40 128
+40 131
+40 135
+40 136
+41 42
+41 46
+41 47
+41 48
+41 50
+41 51
+41 55
+41 57
+41 69
+41 95
+41 97
+41 101
+41 103
+41 104
+41 115
+41 116
+41 120
+41 122
+41 124
+41 127
+41 131
+41 135
+41 164
+42 43
+42 44
+42 45
+42 46
+42 47
+42 48
+42 49
+42 50
+42 51
+42 53
+42 54
+42 55
+42 56
+42 57
+42 59
+42 60
+42 61
+42 63
+42 65
+42 69
+42 82
+42 87
+42 89
+42 91
+42 97
+42 98
+42 99
+42 100
+42 101
+42 102
+42 103
+42 104
+42 105
+42 106
+42 107
+42 108
+42 109
+42 110
+42 111
+42 112
+42 115
+42 116
+42 117
+42 118
+42 119
+42 120
+42 121
+42 122
+42 123
+42 126
+42 128
+42 137
+42 154
+42 156
+42 157
+42 160
+42 164
+42 175
+42 196
+42 197
+43 44
+43 45
+43 46
+43 47
+43 48
+43 49
+43 50
+43 51
+43 53
+43 54
+43 56
+43 62
+43 64
+43 66
+43 68
+43 69
+43 80
+43 95
+43 97
+43 98
+43 99
+43 100
+43 102
+43 105
+43 106
+43 107
+43 108
+43 109
+43 110
+43 111
+43 112
+43 113
+43 114
+43 115
+43 117
+43 118
+43 119
+43 120
+43 122
+43 123
+43 124
+43 126
+43 128
+43 137
+43 139
+43 144
+43 164
+43 202
+44 45
+44 46
+44 47
+44 48
+44 49
+44 55
+44 57
+44 68
+44 80
+44 92
+44 97
+44 99
+44 102
+44 104
+44 105
+44 107
+44 108
+44 109
+44 110
+44 111
+44 112
+44 117
+44 120
+44 123
+44 124
+44 128
+44 137
+44 139
+44 150
+44 151
+44 154
+44 175
+45 46
+45 47
+45 48
+45 49
+45 53
+45 54
+45 55
+45 57
+45 60
+45 64
+45 68
+45 80
+45 92
+45 97
+45 99
+45 104
+45 105
+45 106
+45 107
+45 109
+45 111
+45 117
+45 119
+45 120
+45 123
+45 124
+45 128
+45 139
+45 150
+45 151
+45 152
+45 175
+45 176
+45 204
+46 47
+46 48
+46 49
+46 51
+46 53
+46 54
+46 55
+46 56
+46 57
+46 60
+46 64
+46 66
+46 68
+46 80
+46 95
+46 97
+46 98
+46 99
+46 100
+46 103
+46 104
+46 105
+46 106
+46 107
+46 108
+46 109
+46 110
+46 111
+46 112
+46 114
+46 115
+46 117
+46 118
+46 119
+46 120
+46 122
+46 123
+46 124
+46 128
+46 131
+46 137
+46 154
+46 175
+47 48
+47 49
+47 50
+47 52
+47 53
+47 54
+47 55
+47 56
+47 57
+47 60
+47 62
+47 64
+47 66
+47 68
+47 69
+47 80
+47 95
+47 97
+47 98
+47 99
+47 100
+47 103
+47 104
+47 105
+47 106
+47 107
+47 108
+47 110
+47 111
+47 112
+47 114
+47 115
+47 117
+47 118
+47 120
+47 121
+47 123
+47 124
+47 127
+47 128
+47 130
+47 137
+47 139
+47 145
+47 154
+47 160
+47 164
+47 165
+47 175
+47 183
+47 197
+47 202
+47 204
+47 214
+48 49
+48 50
+48 55
+48 56
+48 67
+48 68
+48 95
+48 97
+48 98
+48 99
+48 100
+48 102
+48 104
+48 105
+48 106
+48 107
+48 108
+48 112
+48 114
+48 115
+48 118
+48 119
+48 120
+48 123
+48 124
+48 128
+48 164
+48 175
+49 50
+49 53
+49 54
+49 55
+49 56
+49 57
+49 60
+49 62
+49 64
+49 95
+49 97
+49 98
+49 99
+49 100
+49 102
+49 104
+49 105
+49 106
+49 107
+49 108
+49 110
+49 112
+49 114
+49 115
+49 117
+49 119
+49 123
+49 128
+49 164
+49 175
+49 202
+50 54
+50 56
+50 64
+50 97
+50 98
+50 100
+50 102
+50 106
+50 107
+50 108
+50 115
+50 117
+50 119
+50 121
+50 122
+50 128
+50 197
+51 52
+51 54
+51 56
+51 64
+51 97
+51 98
+51 115
+51 126
+51 128
+51 162
+51 165
+52 53
+52 106
+53 54
+53 55
+53 56
+53 57
+53 60
+53 64
+53 68
+53 87
+53 95
+53 97
+53 99
+53 104
+53 105
+53 106
+53 107
+53 108
+53 110
+53 117
+53 119
+53 120
+53 123
+53 128
+53 137
+53 154
+53 175
+53 197
+53 202
+54 55
+54 57
+54 64
+54 99
+54 100
+54 102
+54 106
+54 107
+54 108
+54 109
+54 110
+54 117
+54 119
+54 120
+54 123
+54 128
+54 175
+54 202
+55 56
+55 60
+55 62
+55 64
+55 66
+55 68
+55 95
+55 96
+55 97
+55 99
+55 104
+55 106
+55 107
+55 108
+55 110
+55 112
+55 115
+55 117
+55 118
+55 120
+55 123
+55 124
+55 128
+55 131
+55 137
+55 154
+55 164
+55 165
+55 175
+55 214
+56 57
+56 60
+56 62
+56 64
+56 66
+56 68
+56 97
+56 98
+56 99
+56 100
+56 105
+56 106
+56 107
+56 108
+56 110
+56 117
+56 119
+56 120
+56 123
+56 137
+56 175
+56 202
+57 60
+57 64
+57 95
+57 96
+57 97
+57 99
+57 104
+57 106
+57 107
+57 110
+57 112
+57 117
+57 118
+57 120
+57 123
+57 128
+57 131
+57 137
+57 154
+57 175
+59 62
+59 67
+59 68
+59 82
+59 83
+59 87
+59 88
+59 102
+59 125
+59 129
+59 158
+59 159
+59 160
+59 161
+59 162
+59 163
+60 61
+60 64
+60 66
+60 97
+60 99
+60 104
+60 106
+60 107
+60 110
+60 115
+60 117
+60 120
+60 123
+60 128
+60 154
+60 155
+60 156
+60 157
+60 175
+60 177
+60 196
+61 62
+61 64
+61 66
+61 155
+61 156
+61 157
+61 196
+62 63
+62 65
+62 69
+62 97
+62 101
+62 108
+62 110
+63 66
+63 67
+63 68
+63 82
+63 83
+63 84
+63 87
+63 88
+63 97
+63 104
+63 111
+63 122
+63 125
+63 129
+63 145
+63 158
+63 160
+63 161
+63 162
+63 163
+64 65
+64 69
+64 80
+64 97
+64 98
+64 101
+64 104
+64 106
+64 107
+64 108
+64 110
+64 115
+64 116
+64 117
+64 119
+64 120
+64 123
+64 128
+64 164
+64 175
+64 202
+65 66
+65 67
+65 80
+65 84
+65 87
+65 88
+65 97
+65 100
+65 104
+65 120
+65 122
+65 125
+65 129
+65 160
+65 161
+65 162
+66 69
+66 97
+66 104
+66 107
+66 108
+66 120
+66 128
+66 155
+66 156
+66 157
+66 196
+67 68
+67 69
+67 84
+67 85
+67 99
+67 104
+67 115
+67 131
+67 161
+67 172
+67 195
+68 82
+68 83
+68 85
+68 97
+68 102
+68 105
+68 107
+68 128
+68 129
+68 130
+68 143
+68 163
+69 80
+69 97
+69 100
+69 104
+69 111
+69 114
+69 122
+69 125
+69 145
+69 160
+69 161
+69 162
+70 71
+70 72
+70 73
+70 74
+70 76
+70 77
+70 79
+70 90
+70 91
+70 133
+70 138
+70 140
+70 142
+70 146
+70 147
+70 148
+70 150
+70 153
+70 176
+71 73
+71 74
+71 75
+71 76
+71 77
+71 90
+71 91
+71 92
+71 93
+71 94
+71 120
+71 132
+71 133
+71 138
+71 140
+71 142
+71 146
+71 147
+71 148
+71 149
+71 150
+71 151
+71 152
+71 153
+71 176
+72 73
+72 74
+72 92
+72 93
+72 94
+72 133
+72 138
+72 140
+72 142
+72 147
+72 148
+72 151
+72 152
+72 153
+72 176
+73 77
+73 79
+73 132
+73 134
+73 140
+74 77
+74 79
+74 132
+74 134
+74 140
+75 133
+75 134
+75 140
+75 147
+76 77
+76 93
+76 94
+76 132
+76 133
+76 134
+76 140
+76 147
+76 150
+76 152
+76 153
+76 176
+77 132
+77 133
+77 134
+77 138
+77 140
+77 142
+77 146
+77 147
+77 148
+77 150
+77 151
+77 153
+78 91
+78 93
+78 94
+78 132
+78 133
+78 146
+78 147
+78 148
+78 149
+79 92
+79 93
+79 132
+79 133
+79 138
+79 140
+79 142
+79 147
+79 148
+79 149
+79 150
+79 151
+79 153
+79 176
+80 92
+80 97
+80 98
+80 100
+80 103
+80 105
+80 107
+80 108
+80 110
+80 115
+80 117
+80 120
+80 122
+80 124
+80 128
+80 151
+80 162
+80 176
+81 83
+81 84
+81 88
+81 183
+81 184
+81 185
+82 83
+82 87
+82 88
+82 89
+82 102
+82 143
+82 183
+83 85
+83 86
+83 88
+83 89
+83 102
+83 143
+84 86
+84 87
+84 88
+84 211
+85 88
+85 129
+85 158
+85 159
+85 161
+85 162
+86 88
+87 88
+87 102
+87 107
+87 118
+87 202
+88 183
+88 185
+88 215
+89 184
+90 93
+90 140
+90 147
+90 149
+90 152
+91 132
+92 120
+92 132
+92 134
+93 132
+93 134
+94 110
+95 106
+95 108
+95 112
+95 127
+95 128
+96 128
+97 98
+97 99
+97 100
+97 102
+97 104
+97 105
+97 106
+97 107
+97 108
+97 110
+97 111
+97 112
+97 114
+97 115
+97 117
+97 122
+97 124
+97 125
+97 128
+97 137
+97 145
+97 177
+98 104
+98 106
+98 107
+98 108
+98 115
+98 116
+98 118
+98 128
+98 204
+99 104
+99 106
+99 107
+99 108
+99 110
+99 117
+99 120
+99 123
+99 131
+99 154
+99 175
+100 108
+100 109
+100 111
+100 112
+100 113
+100 114
+100 122
+100 128
+100 144
+100 145
+101 118
+101 126
+101 165
+101 197
+102 105
+102 107
+102 108
+102 112
+102 117
+102 143
+102 183
+103 111
+103 112
+103 115
+103 118
+103 121
+103 122
+103 126
+103 128
+103 129
+103 131
+103 165
+103 197
+104 106
+104 107
+104 110
+104 115
+104 117
+104 120
+104 123
+104 124
+104 125
+104 137
+104 154
+104 164
+104 177
+105 107
+105 108
+105 111
+105 112
+105 114
+105 117
+105 119
+105 128
+106 107
+106 108
+106 110
+106 119
+106 120
+106 123
+106 124
+106 128
+106 154
+106 175
+106 204
+107 108
+107 109
+107 110
+107 112
+107 115
+107 117
+107 119
+107 120
+107 123
+107 124
+107 128
+107 137
+107 164
+107 175
+107 202
+107 204
+108 112
+108 114
+108 115
+108 119
+108 120
+108 122
+108 128
+109 114
+109 128
+110 111
+110 117
+110 119
+110 120
+110 123
+110 124
+110 128
+111 115
+111 117
+111 120
+111 122
+111 124
+111 128
+111 162
+111 176
+112 114
+112 118
+112 122
+112 126
+112 128
+112 129
+113 114
+113 128
+114 117
+114 122
+114 126
+114 128
+115 116
+115 117
+115 118
+115 120
+115 124
+115 125
+115 128
+115 162
+115 177
+116 165
+117 118
+117 120
+117 123
+117 127
+117 128
+117 164
+118 126
+118 127
+118 128
+118 131
+119 123
+119 128
+119 175
+120 122
+120 123
+120 128
+120 137
+120 139
+120 150
+120 154
+120 164
+120 175
+120 176
+121 122
+121 126
+121 128
+122 126
+122 128
+122 162
+123 137
+123 154
+123 175
+124 137
+126 129
+126 131
+126 162
+126 167
+126 168
+126 170
+126 171
+126 172
+127 165
+127 197
+128 202
+129 168
+129 170
+130 145
+130 159
+130 160
+130 162
+132 133
+132 138
+132 140
+132 142
+132 146
+132 147
+132 148
+132 149
+132 150
+132 151
+132 152
+132 153
+133 134
+133 140
+133 153
+134 138
+134 140
+134 142
+134 148
+134 150
+134 153
+134 176
+138 140
+138 147
+138 150
+141 181
+141 206
+142 153
+145 160
+145 210
+146 153
+154 175
+155 156
+155 157
+156 157
+158 166
+158 167
+158 168
+158 169
+158 170
+158 171
+158 172
+163 208
+164 175
+166 172
+167 168
+168 170
+168 172
+173 174
+174 179
+174 182
+174 203
+174 213
+178 179
+178 180
+178 181
+179 180
+179 203
+179 205
+179 206
+180 181
+180 203
+180 213
+181 207
+181 213
+182 205
+182 206
+183 208
+186 187
+186 191
+186 193
+186 194
+186 199
+187 190
+187 191
+187 193
+187 201
+188 189
+188 193
+188 194
+188 199
+188 200
+188 212
+189 190
+189 193
+189 201
+190 193
+190 194
+190 199
+190 200
+191 194
+191 198
+191 199
+191 200
+192 194
+193 194
+193 198
+193 199
+193 212
+193 216
+194 200
+194 212
+198 200
+198 201
+199 200
+200 201
+201 216
+205 206
+205 207
+208 209

analysis/rdkit_functions.py ADDED Viewed

	@@ -0,0 +1,334 @@

+import numpy as np
+import torch
+import re
+import wandb
+try:
+    from rdkit import Chem
+    print("Found rdkit, all good")
+except ModuleNotFoundError as e:
+    use_rdkit = False
+    from warnings import warn
+    warn("Didn't find rdkit, this will fail")
+    assert use_rdkit, "Didn't find rdkit"
+allowed_bonds = {'H': 1, 'C': 4, 'N': 3, 'O': 2, 'F': 1, 'B': 3, 'Al': 3, 'Si': 4, 'P': [3, 5],
+                 'S': 4, 'Cl': 1, 'As': 3, 'Br': 1, 'I': 1, 'Hg': [1, 2], 'Bi': [3, 5], 'Se': [2, 4, 6]}
+bond_dict = [None, Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, Chem.rdchem.BondType.TRIPLE,
+                 Chem.rdchem.BondType.AROMATIC]
+ATOM_VALENCY = {6: 4, 7: 3, 8: 2, 9: 1, 15: 3, 16: 2, 17: 1, 35: 1, 53: 1}
+class BasicMolecularMetrics(object):
+    def __init__(self, dataset_info, train_smiles=None):
+        self.atom_decoder = dataset_info.atom_decoder
+        self.dataset_info = dataset_info
+        # Retrieve dataset smiles only for qm9 currently.
+        self.dataset_smiles_list = train_smiles
+    def compute_validity(self, generated):
+        """ generated: list of couples (positions, atom_types)"""
+        valid = []
+        num_components = []
+        all_smiles = []
+        for graph in generated:
+            atom_types, edge_types = graph
+            mol = build_molecule(atom_types, edge_types, self.dataset_info.atom_decoder)
+            smiles = mol2smiles(mol)
+            try:
+                mol_frags = Chem.rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True)
+                num_components.append(len(mol_frags))
+            except:
+                pass
+            if smiles is not None:
+                try:
+                    mol_frags = Chem.rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True)
+                    largest_mol = max(mol_frags, default=mol, key=lambda m: m.GetNumAtoms())
+                    smiles = mol2smiles(largest_mol)
+                    valid.append(smiles)
+                    all_smiles.append(smiles)
+                except Chem.rdchem.AtomValenceException:
+                    print("Valence error in GetmolFrags")
+                    all_smiles.append(None)
+                except Chem.rdchem.KekulizeException:
+                    print("Can't kekulize molecule")
+                    all_smiles.append(None)
+            else:
+                all_smiles.append(None)
+        return valid, len(valid) / len(generated), np.array(num_components), all_smiles
+    def compute_uniqueness(self, valid):
+        """ valid: list of SMILES strings."""
+        return list(set(valid)), len(set(valid)) / len(valid)
+    def compute_novelty(self, unique):
+        num_novel = 0
+        novel = []
+        if self.dataset_smiles_list is None:
+            print("Dataset smiles is None, novelty computation skipped")
+            return 1, 1
+        for smiles in unique:
+            if smiles not in self.dataset_smiles_list:
+                novel.append(smiles)
+                num_novel += 1
+        return novel, num_novel / len(unique)
+    def compute_relaxed_validity(self, generated):
+        valid = []
+        for graph in generated:
+            atom_types, edge_types = graph
+            mol = build_molecule_with_partial_charges(atom_types, edge_types, self.dataset_info.atom_decoder)
+            smiles = mol2smiles(mol)
+            if smiles is not None:
+                try:
+                    mol_frags = Chem.rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True)
+                    largest_mol = max(mol_frags, default=mol, key=lambda m: m.GetNumAtoms())
+                    smiles = mol2smiles(largest_mol)
+                    valid.append(smiles)
+                except Chem.rdchem.AtomValenceException:
+                    print("Valence error in GetmolFrags")
+                except Chem.rdchem.KekulizeException:
+                    print("Can't kekulize molecule")
+        return valid, len(valid) / len(generated)
+    def evaluate(self, generated):
+        """ generated: list of pairs (positions: n x 3, atom_types: n [int])
+            the positions and atom types should already be masked. """
+        valid, validity, num_components, all_smiles = self.compute_validity(generated)
+        nc_mu = num_components.mean() if len(num_components) > 0 else 0
+        nc_min = num_components.min() if len(num_components) > 0 else 0
+        nc_max = num_components.max() if len(num_components) > 0 else 0
+        print(f"Validity over {len(generated)} molecules: {validity * 100 :.2f}%")
+        print(f"Number of connected components of {len(generated)} molecules: min:{nc_min:.2f} mean:{nc_mu:.2f} max:{nc_max:.2f}")
+        relaxed_valid, relaxed_validity = self.compute_relaxed_validity(generated)
+        print(f"Relaxed validity over {len(generated)} molecules: {relaxed_validity * 100 :.2f}%")
+        if relaxed_validity > 0:
+            unique, uniqueness = self.compute_uniqueness(relaxed_valid)
+            print(f"Uniqueness over {len(relaxed_valid)} valid molecules: {uniqueness * 100 :.2f}%")
+            if self.dataset_smiles_list is not None:
+                _, novelty = self.compute_novelty(unique)
+                print(f"Novelty over {len(unique)} unique valid molecules: {novelty * 100 :.2f}%")
+            else:
+                novelty = -1.0
+        else:
+            novelty = -1.0
+            uniqueness = 0.0
+            unique = []
+        return ([validity, relaxed_validity, uniqueness, novelty], unique,
+                dict(nc_min=nc_min, nc_max=nc_max, nc_mu=nc_mu), all_smiles)
+def mol2smiles(mol):
+    try:
+        Chem.SanitizeMol(mol)
+    except ValueError:
+        return None
+    return Chem.MolToSmiles(mol)
+def build_molecule(atom_types, edge_types, atom_decoder, verbose=False):
+    if verbose:
+        print("building new molecule")
+    mol = Chem.RWMol()
+    for atom in atom_types:
+        a = Chem.Atom(atom_decoder[atom.item()])
+        mol.AddAtom(a)
+        if verbose:
+            print("Atom added: ", atom.item(), atom_decoder[atom.item()])
+    edge_types = torch.triu(edge_types)
+    all_bonds = torch.nonzero(edge_types)
+    for i, bond in enumerate(all_bonds):
+        if bond[0].item() != bond[1].item():
+            mol.AddBond(bond[0].item(), bond[1].item(), bond_dict[edge_types[bond[0], bond[1]].item()])
+            if verbose:
+                print("bond added:", bond[0].item(), bond[1].item(), edge_types[bond[0], bond[1]].item(),
+                      bond_dict[edge_types[bond[0], bond[1]].item()] )
+    return mol
+def build_molecule_with_partial_charges(atom_types, edge_types, atom_decoder, verbose=False):
+    if verbose:
+        print("\nbuilding new molecule")
+    mol = Chem.RWMol()
+    for atom in atom_types:
+        a = Chem.Atom(atom_decoder[atom.item()])
+        mol.AddAtom(a)
+        if verbose:
+            print("Atom added: ", atom.item(), atom_decoder[atom.item()])
+    edge_types = torch.triu(edge_types)
+    all_bonds = torch.nonzero(edge_types)
+    for i, bond in enumerate(all_bonds):
+        if bond[0].item() != bond[1].item():
+            mol.AddBond(bond[0].item(), bond[1].item(), bond_dict[edge_types[bond[0], bond[1]].item()])
+            if verbose:
+                print("bond added:", bond[0].item(), bond[1].item(), edge_types[bond[0], bond[1]].item(),
+                      bond_dict[edge_types[bond[0], bond[1]].item()])
+            # add formal charge to atom: e.g. [O+], [N+], [S+]
+            # not support [O-], [N-], [S-], [NH+] etc.
+            flag, atomid_valence = check_valency(mol)
+            if verbose:
+                print("flag, valence", flag, atomid_valence)
+            if flag:
+                continue
+            else:
+                assert len(atomid_valence) == 2
+                idx = atomid_valence[0]
+                v = atomid_valence[1]
+                an = mol.GetAtomWithIdx(idx).GetAtomicNum()
+                if verbose:
+                    print("atomic num of atom with a large valence", an)
+                if an in (7, 8, 16) and (v - ATOM_VALENCY[an]) == 1:
+                    mol.GetAtomWithIdx(idx).SetFormalCharge(1)
+                    # print("Formal charge added")
+    return mol
+# Functions from GDSS
+def check_valency(mol):
+    try:
+        Chem.SanitizeMol(mol, sanitizeOps=Chem.SanitizeFlags.SANITIZE_PROPERTIES)
+        return True, None
+    except ValueError as e:
+        e = str(e)
+        p = e.find('#')
+        e_sub = e[p:]
+        atomid_valence = list(map(int, re.findall(r'\d+', e_sub)))
+        return False, atomid_valence
+def correct_mol(m):
+    # xsm = Chem.MolToSmiles(x, isomericSmiles=True)
+    mol = m
+    #####
+    no_correct = False
+    flag, _ = check_valency(mol)
+    if flag:
+        no_correct = True
+    while True:
+        flag, atomid_valence = check_valency(mol)
+        if flag:
+            break
+        else:
+            assert len(atomid_valence) == 2
+            idx = atomid_valence[0]
+            v = atomid_valence[1]
+            queue = []
+            check_idx = 0
+            for b in mol.GetAtomWithIdx(idx).GetBonds():
+                type = int(b.GetBondType())
+                queue.append((b.GetIdx(), type, b.GetBeginAtomIdx(), b.GetEndAtomIdx()))
+                if type == 12:
+                    check_idx += 1
+            queue.sort(key=lambda tup: tup[1], reverse=True)
+            if queue[-1][1] == 12:
+                return None, no_correct
+            elif len(queue) > 0:
+                start = queue[check_idx][2]
+                end = queue[check_idx][3]
+                t = queue[check_idx][1] - 1
+                mol.RemoveBond(start, end)
+                if t >= 1:
+                    mol.AddBond(start, end, bond_dict[t])
+    return mol, no_correct
+def valid_mol_can_with_seg(m, largest_connected_comp=True):
+    if m is None:
+        return None
+    sm = Chem.MolToSmiles(m, isomericSmiles=True)
+    if largest_connected_comp and '.' in sm:
+        vsm = [(s, len(s)) for s in sm.split('.')]  # 'C.CC.CCc1ccc(N)cc1CCC=O'.split('.')
+        vsm.sort(key=lambda tup: tup[1], reverse=True)
+        mol = Chem.MolFromSmiles(vsm[0][0])
+    else:
+        mol = Chem.MolFromSmiles(sm)
+    return mol
+if __name__ == '__main__':
+    smiles_mol = 'C1CCC1'
+    print("Smiles mol %s" % smiles_mol)
+    chem_mol = Chem.MolFromSmiles(smiles_mol)
+    block_mol = Chem.MolToMolBlock(chem_mol)
+    print("Block mol:")
+    print(block_mol)
+use_rdkit = True
+def check_stability(atom_types, edge_types, dataset_info, debug=False,atom_decoder=None):
+    if atom_decoder is None:
+        atom_decoder = dataset_info.atom_decoder
+    n_bonds = np.zeros(len(atom_types), dtype='int')
+    for i in range(len(atom_types)):
+        for j in range(i + 1, len(atom_types)):
+            n_bonds[i] += abs((edge_types[i, j] + edge_types[j, i])/2)
+            n_bonds[j] += abs((edge_types[i, j] + edge_types[j, i])/2)
+    n_stable_bonds = 0
+    for atom_type, atom_n_bond in zip(atom_types, n_bonds):
+        possible_bonds = allowed_bonds[atom_decoder[atom_type]]
+        if type(possible_bonds) == int:
+            is_stable = possible_bonds == atom_n_bond
+        else:
+            is_stable = atom_n_bond in possible_bonds
+        if not is_stable and debug:
+            print("Invalid bonds for molecule %s with %d bonds" % (atom_decoder[atom_type], atom_n_bond))
+        n_stable_bonds += int(is_stable)
+    molecule_stable = n_stable_bonds == len(atom_types)
+    return molecule_stable, n_stable_bonds, len(atom_types)
+def compute_molecular_metrics(molecule_list, train_smiles, dataset_info):
+    """ molecule_list: (dict) """
+    if not dataset_info.remove_h:
+        print(f'Analyzing molecule stability...')
+        molecule_stable = 0
+        nr_stable_bonds = 0
+        n_atoms = 0
+        n_molecules = len(molecule_list)
+        for i, mol in enumerate(molecule_list):
+            atom_types, edge_types = mol
+            validity_results = check_stability(atom_types, edge_types, dataset_info)
+            molecule_stable += int(validity_results[0])
+            nr_stable_bonds += int(validity_results[1])
+            n_atoms += int(validity_results[2])
+        # Validity
+        fraction_mol_stable = molecule_stable / float(n_molecules)
+        fraction_atm_stable = nr_stable_bonds / float(n_atoms)
+        validity_dict = {'mol_stable': fraction_mol_stable, 'atm_stable': fraction_atm_stable}
+        if wandb.run:
+            wandb.log(validity_dict)
+    else:
+        validity_dict = {'mol_stable': -1, 'atm_stable': -1}
+    metrics = BasicMolecularMetrics(dataset_info, train_smiles)
+    rdkit_metrics = metrics.evaluate(molecule_list)
+    all_smiles = rdkit_metrics[-1]
+    if wandb.run:
+        nc = rdkit_metrics[-2]
+        dic = {'Validity': rdkit_metrics[0][0], 'Relaxed Validity': rdkit_metrics[0][1],
+               'Uniqueness': rdkit_metrics[0][2], 'Novelty': rdkit_metrics[0][3],
+               'nc_max': nc['nc_max'], 'nc_mu': nc['nc_mu']}
+        wandb.log(dic)
+    return validity_dict, rdkit_metrics, all_smiles

analysis/spectre_utils.py ADDED Viewed

	@@ -0,0 +1,928 @@

+###############################################################################
+#
+# Adapted from https://github.com/lrjconan/GRAN/ which in turn is adapted from https://github.com/JiaxuanYou/graph-generation
+#
+###############################################################################
+# import graph_tool.all as gt
+##Navigate to the ./util/orca directory and compile orca.cpp
+# g++ -O2 -std=c++11 -o orca orca.cpp
+import os
+import copy
+import torch
+import torch.nn as nn
+import numpy as np
+import networkx as nx
+import subprocess as sp
+import concurrent.futures
+import pygsp as pg
+import secrets
+from string import ascii_uppercase, digits
+from datetime import datetime
+from scipy.linalg import eigvalsh
+from scipy.stats import chi2
+from analysis.dist_helper import compute_mmd, gaussian_emd, gaussian, emd, gaussian_tv, disc
+from torch_geometric.utils import to_networkx
+import wandb
+from collections import defaultdict
+PRINT_TIME = False
+__all__ = ['degree_stats', 'clustering_stats', 'orbit_stats_all', 'spectral_stats', 'eval_acc_lobster_graph']
+def degree_worker(G):
+    return np.array(nx.degree_histogram(G))
+def degree_stats(graph_ref_list, graph_pred_list, is_parallel=True, compute_emd=False):
+    ''' Compute the distance between the degree distributions of two unordered sets of graphs.
+        Args:
+            graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated
+        '''
+    sample_ref = []
+    sample_pred = []
+    # in case an empty graph is generated
+    graph_pred_list_remove_empty = [
+        G for G in graph_pred_list if not G.number_of_nodes() == 0
+    ]
+    prev = datetime.now()
+    if is_parallel:
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            for deg_hist in executor.map(degree_worker, graph_ref_list):
+                sample_ref.append(deg_hist)
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            for deg_hist in executor.map(degree_worker, graph_pred_list_remove_empty):
+                sample_pred.append(deg_hist)
+    else:
+        for i in range(len(graph_ref_list)):
+            degree_temp = np.array(nx.degree_histogram(graph_ref_list[i]))
+            sample_ref.append(degree_temp)
+        for i in range(len(graph_pred_list_remove_empty)):
+            degree_temp = np.array(
+                nx.degree_histogram(graph_pred_list_remove_empty[i]))
+            sample_pred.append(degree_temp)
+    # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd)
+    # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd)
+    if compute_emd:
+        # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN
+        # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd)
+        mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd)
+    else:
+        mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv)
+    # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian)
+    elapsed = datetime.now() - prev
+    if PRINT_TIME:
+        print('Time computing degree mmd: ', elapsed)
+    return mmd_dist
+###############################################################################
+def spectral_worker(G, n_eigvals=-1):
+    # eigs = nx.laplacian_spectrum(G)
+    try:
+        eigs = eigvalsh(nx.normalized_laplacian_matrix(G).todense())
+    except:
+        eigs = np.zeros(G.number_of_nodes())
+    if n_eigvals > 0:
+        eigs = eigs[1:n_eigvals + 1]
+    spectral_pmf, _ = np.histogram(eigs, bins=200, range=(-1e-5, 2), density=False)
+    spectral_pmf = spectral_pmf / spectral_pmf.sum()
+    return spectral_pmf
+def get_spectral_pmf(eigs, max_eig):
+    spectral_pmf, _ = np.histogram(np.clip(eigs, 0, max_eig), bins=200, range=(-1e-5, max_eig), density=False)
+    spectral_pmf = spectral_pmf / spectral_pmf.sum()
+    return spectral_pmf
+def eigval_stats(eig_ref_list, eig_pred_list, max_eig=20, is_parallel=True, compute_emd=False):
+    ''' Compute the distance between the degree distributions of two unordered sets of graphs.
+        Args:
+            graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated
+        '''
+    sample_ref = []
+    sample_pred = []
+    prev = datetime.now()
+    if is_parallel:
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            for spectral_density in executor.map(get_spectral_pmf, eig_ref_list,
+                                                 [max_eig for i in range(len(eig_ref_list))]):
+                sample_ref.append(spectral_density)
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            for spectral_density in executor.map(get_spectral_pmf, eig_pred_list,
+                                                 [max_eig for i in range(len(eig_ref_list))]):
+                sample_pred.append(spectral_density)
+    else:
+        for i in range(len(eig_ref_list)):
+            spectral_temp = get_spectral_pmf(eig_ref_list[i])
+            sample_ref.append(spectral_temp)
+        for i in range(len(eig_pred_list)):
+            spectral_temp = get_spectral_pmf(eig_pred_list[i])
+            sample_pred.append(spectral_temp)
+    # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd)
+    if compute_emd:
+        mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd)
+    else:
+        mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv)
+    # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian)
+    elapsed = datetime.now() - prev
+    if PRINT_TIME:
+        print('Time computing eig mmd: ', elapsed)
+    return mmd_dist
+def eigh_worker(G):
+    L = nx.normalized_laplacian_matrix(G).todense()
+    try:
+        eigvals, eigvecs = np.linalg.eigh(L)
+    except:
+        eigvals = np.zeros(L[0, :].shape)
+        eigvecs = np.zeros(L.shape)
+    return (eigvals, eigvecs)
+def compute_list_eigh(graph_list, is_parallel=False):
+    eigval_list = []
+    eigvec_list = []
+    if is_parallel:
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            for e_U in executor.map(eigh_worker, graph_list):
+                eigval_list.append(e_U[0])
+                eigvec_list.append(e_U[1])
+    else:
+        for i in range(len(graph_list)):
+            e_U = eigh_worker(graph_list[i])
+            eigval_list.append(e_U[0])
+            eigvec_list.append(e_U[1])
+    return eigval_list, eigvec_list
+def get_spectral_filter_worker(eigvec, eigval, filters, bound=1.4):
+    ges = filters.evaluate(eigval)
+    linop = []
+    for ge in ges:
+        linop.append(eigvec @ np.diag(ge) @ eigvec.T)
+    linop = np.array(linop)
+    norm_filt = np.sum(linop ** 2, axis=2)
+    hist_range = [0, bound]
+    hist = np.array([np.histogram(x, range=hist_range, bins=100)[0] for x in norm_filt])  # NOTE: change number of bins
+    return hist.flatten()
+def spectral_filter_stats(eigvec_ref_list, eigval_ref_list, eigvec_pred_list, eigval_pred_list, is_parallel=False,
+                          compute_emd=False):
+    ''' Compute the distance between the eigvector sets.
+        Args:
+            graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated
+        '''
+    prev = datetime.now()
+    class DMG(object):
+        """Dummy Normalized Graph"""
+        lmax = 2
+    n_filters = 12
+    filters = pg.filters.Abspline(DMG, n_filters)
+    bound = np.max(filters.evaluate(np.arange(0, 2, 0.01)))
+    sample_ref = []
+    sample_pred = []
+    if is_parallel:
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            for spectral_density in executor.map(get_spectral_filter_worker, eigvec_ref_list, eigval_ref_list,
+                                                 [filters for i in range(len(eigval_ref_list))],
+                                                 [bound for i in range(len(eigval_ref_list))]):
+                sample_ref.append(spectral_density)
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            for spectral_density in executor.map(get_spectral_filter_worker, eigvec_pred_list, eigval_pred_list,
+                                                 [filters for i in range(len(eigval_ref_list))],
+                                                 [bound for i in range(len(eigval_ref_list))]):
+                sample_pred.append(spectral_density)
+    else:
+        for i in range(len(eigval_ref_list)):
+            try:
+                spectral_temp = get_spectral_filter_worker(eigvec_ref_list[i], eigval_ref_list[i], filters, bound)
+                sample_ref.append(spectral_temp)
+            except:
+                pass
+        for i in range(len(eigval_pred_list)):
+            try:
+                spectral_temp = get_spectral_filter_worker(eigvec_pred_list[i], eigval_pred_list[i], filters, bound)
+                sample_pred.append(spectral_temp)
+            except:
+                pass
+    if compute_emd:
+        # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN
+        # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd)
+        mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd)
+    else:
+        mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv)
+    elapsed = datetime.now() - prev
+    if PRINT_TIME:
+        print('Time computing spectral filter stats: ', elapsed)
+    return mmd_dist
+def spectral_stats(graph_ref_list, graph_pred_list, is_parallel=True, n_eigvals=-1, compute_emd=False):
+    ''' Compute the distance between the degree distributions of two unordered sets of graphs.
+        Args:
+            graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated
+        '''
+    sample_ref = []
+    sample_pred = []
+    # in case an empty graph is generated
+    graph_pred_list_remove_empty = [
+        G for G in graph_pred_list if not G.number_of_nodes() == 0
+    ]
+    prev = datetime.now()
+    if is_parallel:
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            for spectral_density in executor.map(spectral_worker, graph_ref_list, [n_eigvals for i in graph_ref_list]):
+                sample_ref.append(spectral_density)
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            for spectral_density in executor.map(spectral_worker, graph_pred_list_remove_empty,
+                                                 [n_eigvals for i in graph_ref_list]):
+                sample_pred.append(spectral_density)
+    else:
+        for i in range(len(graph_ref_list)):
+            spectral_temp = spectral_worker(graph_ref_list[i], n_eigvals)
+            sample_ref.append(spectral_temp)
+        for i in range(len(graph_pred_list_remove_empty)):
+            spectral_temp = spectral_worker(graph_pred_list_remove_empty[i], n_eigvals)
+            sample_pred.append(spectral_temp)
+    # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd)
+    # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd)
+    if compute_emd:
+        # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN
+        # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd)
+        mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd)
+    else:
+        mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv)
+    # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian)
+    elapsed = datetime.now() - prev
+    if PRINT_TIME:
+        print('Time computing degree mmd: ', elapsed)
+    return mmd_dist
+###############################################################################
+def clustering_worker(param):
+    G, bins = param
+    clustering_coeffs_list = list(nx.clustering(G).values())
+    hist, _ = np.histogram(
+        clustering_coeffs_list, bins=bins, range=(0.0, 1.0), density=False)
+    return hist
+def clustering_stats(graph_ref_list,
+                     graph_pred_list,
+                     bins=100,
+                     is_parallel=True, compute_emd=False):
+    sample_ref = []
+    sample_pred = []
+    graph_pred_list_remove_empty = [
+        G for G in graph_pred_list if not G.number_of_nodes() == 0
+    ]
+    prev = datetime.now()
+    if is_parallel:
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            for clustering_hist in executor.map(clustering_worker,
+                                                [(G, bins) for G in graph_ref_list]):
+                sample_ref.append(clustering_hist)
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            for clustering_hist in executor.map(
+                    clustering_worker, [(G, bins) for G in graph_pred_list_remove_empty]):
+                sample_pred.append(clustering_hist)
+        # check non-zero elements in hist
+        # total = 0
+        # for i in range(len(sample_pred)):
+        #    nz = np.nonzero(sample_pred[i])[0].shape[0]
+        #    total += nz
+        # print(total)
+    else:
+        for i in range(len(graph_ref_list)):
+            clustering_coeffs_list = list(nx.clustering(graph_ref_list[i]).values())
+            hist, _ = np.histogram(
+                clustering_coeffs_list, bins=bins, range=(0.0, 1.0), density=False)
+            sample_ref.append(hist)
+        for i in range(len(graph_pred_list_remove_empty)):
+            clustering_coeffs_list = list(
+                nx.clustering(graph_pred_list_remove_empty[i]).values())
+            hist, _ = np.histogram(
+                clustering_coeffs_list, bins=bins, range=(0.0, 1.0), density=False)
+            sample_pred.append(hist)
+    if compute_emd:
+        # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN
+        # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd, sigma=1.0 / 10)
+        mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd, sigma=1.0 / 10, distance_scaling=bins)
+    else:
+        mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv, sigma=1.0 / 10)
+    elapsed = datetime.now() - prev
+    if PRINT_TIME:
+        print('Time computing clustering mmd: ', elapsed)
+    return mmd_dist
+# maps motif/orbit name string to its corresponding list of indices from orca output
+motif_to_indices = {
+    '3path': [1, 2],
+    '4cycle': [8],
+}
+COUNT_START_STR = 'orbit counts:'
+def edge_list_reindexed(G):
+    idx = 0
+    id2idx = dict()
+    for u in G.nodes():
+        id2idx[str(u)] = idx
+        idx += 1
+    edges = []
+    for (u, v) in G.edges():
+        edges.append((id2idx[str(u)], id2idx[str(v)]))
+    return edges
+def orca(graph):
+    # tmp_fname = f'analysis/orca/tmp_{"".join(secrets.choice(ascii_uppercase + digits) for i in range(8))}.txt'
+    tmp_fname = f'orca/tmp_{"".join(secrets.choice(ascii_uppercase + digits) for i in range(8))}.txt'
+    tmp_fname = os.path.join(os.path.dirname(os.path.realpath(__file__)), tmp_fname)
+    # print(tmp_fname, flush=True)
+    f = open(tmp_fname, 'w')
+    f.write(
+        str(graph.number_of_nodes()) + ' ' + str(graph.number_of_edges()) + '\n')
+    for (u, v) in edge_list_reindexed(graph):
+        f.write(str(u) + ' ' + str(v) + '\n')
+    f.close()
+    output = sp.check_output(
+        [str(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'orca/orca')), 'node', '4', tmp_fname, 'std'])
+    output = output.decode('utf8').strip()
+    idx = output.find(COUNT_START_STR) + len(COUNT_START_STR) + 2
+    output = output[idx:]
+    node_orbit_counts = np.array([
+        list(map(int,
+                 node_cnts.strip().split(' ')))
+        for node_cnts in output.strip('\n').split('\n')
+    ])
+    try:
+        os.remove(tmp_fname)
+    except OSError:
+        pass
+    return node_orbit_counts
+def motif_stats(graph_ref_list, graph_pred_list, motif_type='4cycle', ground_truth_match=None,
+                bins=100, compute_emd=False):
+    # graph motif counts (int for each graph)
+    # normalized by graph size
+    total_counts_ref = []
+    total_counts_pred = []
+    num_matches_ref = []
+    num_matches_pred = []
+    graph_pred_list_remove_empty = [G for G in graph_pred_list if not G.number_of_nodes() == 0]
+    indices = motif_to_indices[motif_type]
+    for G in graph_ref_list:
+        orbit_counts = orca(G)
+        motif_counts = np.sum(orbit_counts[:, indices], axis=1)
+        if ground_truth_match is not None:
+            match_cnt = 0
+            for elem in motif_counts:
+                if elem == ground_truth_match:
+                    match_cnt += 1
+            num_matches_ref.append(match_cnt / G.number_of_nodes())
+        # hist, _ = np.histogram(
+        #        motif_counts, bins=bins, density=False)
+        motif_temp = np.sum(motif_counts) / G.number_of_nodes()
+        total_counts_ref.append(motif_temp)
+    for G in graph_pred_list_remove_empty:
+        orbit_counts = orca(G)
+        motif_counts = np.sum(orbit_counts[:, indices], axis=1)
+        if ground_truth_match is not None:
+            match_cnt = 0
+            for elem in motif_counts:
+                if elem == ground_truth_match:
+                    match_cnt += 1
+            num_matches_pred.append(match_cnt / G.number_of_nodes())
+        motif_temp = np.sum(motif_counts) / G.number_of_nodes()
+        total_counts_pred.append(motif_temp)
+    total_counts_ref = np.array(total_counts_ref)[:, None]
+    total_counts_pred = np.array(total_counts_pred)[:, None]
+    if compute_emd:
+        # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN
+        # mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=emd, is_hist=False)
+        mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=gaussian, is_hist=False)
+    else:
+        mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=gaussian, is_hist=False)
+    return mmd_dist
+def orbit_stats_all(graph_ref_list, graph_pred_list, compute_emd=False):
+    total_counts_ref = []
+    total_counts_pred = []
+    graph_pred_list_remove_empty = [
+        G for G in graph_pred_list if not G.number_of_nodes() == 0
+    ]
+    for G in graph_ref_list:
+        orbit_counts = orca(G)
+        orbit_counts_graph = np.sum(orbit_counts, axis=0) / G.number_of_nodes()
+        total_counts_ref.append(orbit_counts_graph)
+    for G in graph_pred_list:
+        orbit_counts = orca(G)
+        orbit_counts_graph = np.sum(orbit_counts, axis=0) / G.number_of_nodes()
+        total_counts_pred.append(orbit_counts_graph)
+    total_counts_ref = np.array(total_counts_ref)
+    total_counts_pred = np.array(total_counts_pred)
+    # mmd_dist = compute_mmd(
+    #     total_counts_ref,
+    #     total_counts_pred,
+    #     kernel=gaussian,
+    #     is_hist=False,
+    #     sigma=30.0)
+    # mmd_dist = compute_mmd(
+    #         total_counts_ref,
+    #         total_counts_pred,
+    #         kernel=gaussian_tv,
+    #         is_hist=False,
+    #         sigma=30.0)
+    if compute_emd:
+        # mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=emd, sigma=30.0)
+        # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN
+        mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=gaussian, is_hist=False, sigma=30.0)
+    else:
+        mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=gaussian_tv, is_hist=False, sigma=30.0)
+    return mmd_dist
+def eval_acc_lobster_graph(G_list):
+    G_list = [copy.deepcopy(gg) for gg in G_list]
+    count = 0
+    for gg in G_list:
+        if is_lobster_graph(gg):
+            count += 1
+    return count / float(len(G_list))
+def eval_acc_tree_graph(G_list):
+    count = 0
+    for gg in G_list:
+        if nx.is_tree(gg):
+            count += 1
+    return count / float(len(G_list))
+def eval_acc_grid_graph(G_list, grid_start=10, grid_end=20):
+    count = 0
+    for gg in G_list:
+        if is_grid_graph(gg):
+            count += 1
+    return count / float(len(G_list))
+def eval_acc_sbm_graph(G_list, p_intra=0.3, p_inter=0.005, strict=True, refinement_steps=1000, is_parallel=True):
+    count = 0.0
+    if is_parallel:
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            for prob in executor.map(is_sbm_graph,
+                                     [gg for gg in G_list], [p_intra for i in range(len(G_list))],
+                                     [p_inter for i in range(len(G_list))],
+                                     [strict for i in range(len(G_list))],
+                                     [refinement_steps for i in range(len(G_list))]):
+                count += prob
+    else:
+        for gg in G_list:
+            count += is_sbm_graph(gg, p_intra=p_intra, p_inter=p_inter, strict=strict,
+                                  refinement_steps=refinement_steps)
+    return count / float(len(G_list))
+def eval_acc_planar_graph(G_list):
+    count = 0
+    for gg in G_list:
+        if is_planar_graph(gg):
+            count += 1
+    return count / float(len(G_list))
+def is_planar_graph(G):
+    return nx.is_connected(G) and nx.check_planarity(G)[0]
+def is_lobster_graph(G):
+    """
+        Check a given graph is a lobster graph or not
+        Removing leaf nodes twice:
+        lobster -> caterpillar -> path
+    """
+    ### Check if G is a tree
+    if nx.is_tree(G):
+        G = G.copy()
+        ### Check if G is a path after removing leaves twice
+        leaves = [n for n, d in G.degree() if d == 1]
+        G.remove_nodes_from(leaves)
+        leaves = [n for n, d in G.degree() if d == 1]
+        G.remove_nodes_from(leaves)
+        num_nodes = len(G.nodes())
+        num_degree_one = [d for n, d in G.degree() if d == 1]
+        num_degree_two = [d for n, d in G.degree() if d == 2]
+        if sum(num_degree_one) == 2 and sum(num_degree_two) == 2 * (num_nodes - 2):
+            return True
+        elif sum(num_degree_one) == 0 and sum(num_degree_two) == 0:
+            return True
+        else:
+            return False
+    else:
+        return False
+def is_grid_graph(G):
+    """
+    Check if the graph is grid, by comparing with all the real grids with the same node count
+    """
+    all_grid_file = f"data/all_grids.pt"
+    if os.path.isfile(all_grid_file):
+        all_grids = torch.load(all_grid_file)
+    else:
+        all_grids = {}
+        for i in range(2, 20):
+            for j in range(2, 20):
+                G_grid = nx.grid_2d_graph(i, j)
+                n_nodes = f"{len(G_grid.nodes())}"
+                all_grids[n_nodes] = all_grids.get(n_nodes, []) + [G_grid]
+        torch.save(all_grids, all_grid_file)
+    n_nodes = f"{len(G.nodes())}"
+    if n_nodes in all_grids:
+        for G_grid in all_grids[n_nodes]:
+            if nx.faster_could_be_isomorphic(G, G_grid):
+                if nx.is_isomorphic(G, G_grid):
+                    return True
+        return False
+    else:
+        return False
+# def is_sbm_graph(G, p_intra=0.3, p_inter=0.005, strict=True, refinement_steps=1000):
+#     """
+#     Check if how closely given graph matches a SBM with given probabilites by computing mean probability of Wald test statistic for each recovered parameter
+#     """
+#     adj = nx.adjacency_matrix(G).toarray()
+#     idx = adj.nonzero()
+#     g = gt.Graph()
+#     g.add_edge_list(np.transpose(idx))
+#     try:
+#         state = gt.minimize_blockmodel_dl(g)
+#     except ValueError:
+#         if strict:
+#             return False
+#         else:
+#             return 0.0
+#     # Refine using merge-split MCMC
+#     for i in range(refinement_steps):
+#         state.multiflip_mcmc_sweep(beta=np.inf, niter=10)
+#     b = state.get_blocks()
+#     b = gt.contiguous_map(state.get_blocks())
+#     state = state.copy(b=b)
+#     e = state.get_matrix()
+#     n_blocks = state.get_nonempty_B()
+#     node_counts = state.get_nr().get_array()[:n_blocks]
+#     edge_counts = e.todense()[:n_blocks, :n_blocks]
+#     if strict:
+#         if (node_counts > 40).sum() > 0 or (node_counts < 20).sum() > 0 or n_blocks > 5 or n_blocks < 2:
+#             return False
+#     max_intra_edges = node_counts * (node_counts - 1)
+#     est_p_intra = np.diagonal(edge_counts) / (max_intra_edges + 1e-6)
+#     max_inter_edges = node_counts.reshape((-1, 1)) @ node_counts.reshape((1, -1))
+#     np.fill_diagonal(edge_counts, 0)
+#     est_p_inter = edge_counts / (max_inter_edges + 1e-6)
+#     W_p_intra = (est_p_intra - p_intra) ** 2 / (est_p_intra * (1 - est_p_intra) + 1e-6)
+#     W_p_inter = (est_p_inter - p_inter) ** 2 / (est_p_inter * (1 - est_p_inter) + 1e-6)
+#     W = W_p_inter.copy()
+#     np.fill_diagonal(W, W_p_intra)
+#     p = 1 - chi2.cdf(abs(W), 1)
+#     p = p.mean()
+#     if strict:
+#         return p > 0.9  # p value < 10 %
+#     else:
+#         return p
+def eval_fraction_isomorphic(fake_graphs, train_graphs):
+    count = 0
+    for fake_g in fake_graphs:
+        for train_g in train_graphs:
+            if nx.faster_could_be_isomorphic(fake_g, train_g):
+                if nx.is_isomorphic(fake_g, train_g):
+                    count += 1
+                    break
+    return count / float(len(fake_graphs))
+def eval_fraction_unique(fake_graphs, precise=False):
+    count_non_unique = 0
+    fake_evaluated = []
+    for fake_g in fake_graphs:
+        unique = True
+        if not fake_g.number_of_nodes() == 0:
+            for fake_old in fake_evaluated:
+                if precise:
+                    if nx.faster_could_be_isomorphic(fake_g, fake_old):
+                        if nx.is_isomorphic(fake_g, fake_old):
+                            count_non_unique += 1
+                            unique = False
+                            break
+                else:
+                    if nx.faster_could_be_isomorphic(fake_g, fake_old):
+                        if nx.could_be_isomorphic(fake_g, fake_old):
+                            count_non_unique += 1
+                            unique = False
+                            break
+            if unique:
+                fake_evaluated.append(fake_g)
+    frac_unique = (float(len(fake_graphs)) - count_non_unique) / float(
+        len(fake_graphs))  # Fraction of distinct isomorphism classes in the fake graphs
+    return frac_unique
+def eval_fraction_unique_non_isomorphic_valid(fake_graphs, train_graphs, validity_func=(lambda x: True)):
+    count_valid = 0
+    count_isomorphic = 0
+    count_non_unique = 0
+    fake_evaluated = []
+    for fake_g in fake_graphs:
+        unique = True
+        for fake_old in fake_evaluated:
+            if nx.faster_could_be_isomorphic(fake_g, fake_old):
+                if nx.is_isomorphic(fake_g, fake_old):
+                    count_non_unique += 1
+                    unique = False
+                    break
+        if unique:
+            fake_evaluated.append(fake_g)
+            non_isomorphic = True
+            for train_g in train_graphs:
+                if nx.faster_could_be_isomorphic(fake_g, train_g):
+                    if nx.is_isomorphic(fake_g, train_g):
+                        count_isomorphic += 1
+                        non_isomorphic = False
+                        break
+            if non_isomorphic:
+                if validity_func(fake_g):
+                    count_valid += 1
+    frac_unique = (float(len(fake_graphs)) - count_non_unique) / float(
+        len(fake_graphs))  # Fraction of distinct isomorphism classes in the fake graphs
+    frac_unique_non_isomorphic = (float(len(fake_graphs)) - count_non_unique - count_isomorphic) / float(
+        len(fake_graphs))  # Fraction of distinct isomorphism classes in the fake graphs that are not in the training set
+    frac_unique_non_isomorphic_valid = count_valid / float(
+        len(fake_graphs))  # Fraction of distinct isomorphism classes in the fake graphs that are not in the training set and are valid
+    return frac_unique, frac_unique_non_isomorphic, frac_unique_non_isomorphic_valid
+class SpectreSamplingMetrics(nn.Module):
+    def __init__(self, data_loaders, compute_emd, metrics_list):
+        super().__init__()
+        self.train_graphs = self.loader_to_nx(data_loaders['train'])
+        self.val_graphs = self.loader_to_nx(data_loaders['val'])
+        self.test_graphs = self.loader_to_nx(data_loaders['test'])
+        self.num_graphs_test = len(self.test_graphs)
+        self.num_graphs_val = len(self.val_graphs)
+        self.compute_emd = compute_emd
+        self.metrics_list = metrics_list
+    def loader_to_nx(self, loader):
+        networkx_graphs = []
+        for i, batch in enumerate(loader):
+            data_list = batch.to_data_list()
+            for j, data in enumerate(data_list):
+                networkx_graphs.append(to_networkx(data, node_attrs=None, edge_attrs=None, to_undirected=True,
+                                                   remove_self_loops=True))
+        return networkx_graphs
+    def forward(self, generated_graphs: list, local_rank, test=False):
+        reference_graphs = self.test_graphs if test else self.val_graphs
+        if local_rank == 0:
+            print(f"Computing sampling metrics between {len(generated_graphs)} generated graphs and {len(reference_graphs)}"
+                  f" test graphs -- emd computation: {self.compute_emd}")
+        networkx_graphs = []
+        adjacency_matrices = []
+        if local_rank == 0:
+            print("Building networkx graphs...")
+        for graph in generated_graphs:
+            node_types, edge_types = graph
+            A = edge_types.bool().cpu().numpy()
+            adjacency_matrices.append(A)
+            nx_graph = nx.from_numpy_array(A)
+            networkx_graphs.append(nx_graph)
+        np.savez('generated_adjs.npz', *adjacency_matrices)
+        to_log = {}
+        if 'degree' in self.metrics_list:
+            if local_rank == 0:
+                print("Computing degree stats..")
+            degree = degree_stats(reference_graphs, networkx_graphs, is_parallel=True,
+                                  compute_emd=self.compute_emd)
+            to_log['degree'] = degree
+            if wandb.run:
+                wandb.run.summary['degree'] = degree
+        # val_eigvals = [graph["eigval"][1:self.k + 1].cpu().detach().numpy() for graph in self.val]
+        # train_eigvals = [graph["eigval"][1:self.k + 1].cpu().detach().numpy() for graph in self.train]
+        # eigval_stats(eig_ref_list, eig_pred_list, max_eig=20, is_parallel=True, compute_emd=False)
+        # spectral_filter_stats(eigvec_ref_list, eigval_ref_list, eigvec_pred_list, eigval_pred_list, is_parallel=False,
+        #                       compute_emd=False)          # This is the one called wavelet
+        if 'spectre' in self.metrics_list:
+            if local_rank == 0:
+                print("Computing spectre stats...")
+            spectre = spectral_stats(reference_graphs, networkx_graphs, is_parallel=True, n_eigvals=-1,
+                                     compute_emd=self.compute_emd)
+            to_log['spectre'] = spectre
+            if wandb.run:
+              wandb.run.summary['spectre'] = spectre
+        if 'clustering' in self.metrics_list:
+            if local_rank == 0:
+                print("Computing clustering stats...")
+            clustering = clustering_stats(reference_graphs, networkx_graphs, bins=100, is_parallel=True,
+                                          compute_emd=self.compute_emd)
+            to_log['clustering'] = clustering
+            if wandb.run:
+                wandb.run.summary['clustering'] = clustering
+        if 'motif' in self.metrics_list:
+            if local_rank == 0:
+                print("Computing motif stats")
+            motif = motif_stats(reference_graphs, networkx_graphs, motif_type='4cycle', ground_truth_match=None, bins=100,
+                                compute_emd=self.compute_emd)
+            to_log['motif'] = motif
+            if wandb.run:
+                wandb.run.summary['motif'] = motif
+        if 'orbit' in self.metrics_list:
+            if local_rank == 0:
+                print("Computing orbit stats...")
+            orbit = orbit_stats_all(reference_graphs, networkx_graphs, compute_emd=self.compute_emd)
+            to_log['orbit'] = orbit
+            if wandb.run:
+                wandb.run.summary['orbit'] = orbit
+        if 'sbm' in self.metrics_list:
+            if local_rank == 0:
+                print("Computing accuracy...")
+            acc = eval_acc_sbm_graph(networkx_graphs, refinement_steps=100, strict=True)
+            to_log['sbm_acc'] = acc
+            if wandb.run:
+                wandb.run.summary['sbmacc'] = acc
+        if 'planar' in self.metrics_list:
+            if local_rank ==0:
+                print('Computing planar accuracy...')
+            planar_acc = eval_acc_planar_graph(networkx_graphs)
+            to_log['planar_acc'] = planar_acc
+            if wandb.run:
+                wandb.run.summary['planar_acc'] = planar_acc
+        if 'sbm' or 'planar' in self.metrics_list:
+            if local_rank == 0:
+                print("Computing all fractions...")
+            frac_unique, frac_unique_non_isomorphic, fraction_unique_non_isomorphic_valid = eval_fraction_unique_non_isomorphic_valid(
+                networkx_graphs, self.train_graphs, is_sbm_graph if 'sbm' in self.metrics_list else is_planar_graph)
+            frac_non_isomorphic = 1.0 - eval_fraction_isomorphic(networkx_graphs, self.train_graphs)
+            to_log.update({'sampling/frac_unique': frac_unique,
+                           'sampling/frac_unique_non_iso': frac_unique_non_isomorphic,
+                           'sampling/frac_unic_non_iso_valid': fraction_unique_non_isomorphic_valid,
+                           'sampling/frac_non_iso': frac_non_isomorphic})
+        if local_rank == 0:
+            print("Sampling statistics", to_log)
+        if wandb.run:
+            wandb.log(to_log, commit=False)
+    def reset(self):
+        pass
+def loader_to_nx(loader):
+    networkx_graphs = {}
+    for i, batch in enumerate(loader):
+        data_list = batch.to_data_list()
+        for j, data in enumerate(data_list):
+            networkx_graphs[data.prompt_id.squeeze(0).item()] = [to_networkx(data, node_attrs=None, edge_attrs=None, to_undirected=True, remove_self_loops=True)]
+    return networkx_graphs
+def compute_metrics(generated_graphs, referenced_graphs):
+    networkx_graphs = defaultdict(list)
+    adjacency_matrices = defaultdict(list)
+    for key in generated_graphs:
+        for graph in generated_graphs[key]:
+            node_types, edge_types = graph
+            A = edge_types.bool().cpu().numpy()
+            nx_graph = nx.from_numpy_array(A)
+            networkx_graphs[key].append(nx_graph)
+            adjacency_matrices[key].append(A)
+    new_referenced_graphs = []
+    for key in referenced_graphs:
+        new_referenced_graphs.extend(referenced_graphs[key])
+    referenced_graphs = new_referenced_graphs
+    nx_graphs = []
+    for key in networkx_graphs:
+        nx_graphs.extend(networkx_graphs[key])
+    return nx_graphs
+class Comm20SamplingMetrics(SpectreSamplingMetrics):
+    def __init__(self, data_loaders):
+        super().__init__(data_loaders=data_loaders,
+                         compute_emd=True,
+                         metrics_list=['degree', 'clustering', 'orbit'])
+class PlanarSamplingMetrics(SpectreSamplingMetrics):
+    def __init__(self, data_loaders):
+        super().__init__(data_loaders=data_loaders,
+                         compute_emd=False,
+                         metrics_list=['degree', 'clustering', 'orbit', 'spectre', 'planar'])
+class SBMSamplingMetrics(SpectreSamplingMetrics):
+    def __init__(self, data_loaders):
+        super().__init__(data_loaders=data_loaders,
+                         compute_emd=False,
+                         metrics_list=['degree', 'clustering', 'orbit', 'spectre', 'sbm'])
+class CrossDomainSamplingMetrics(SpectreSamplingMetrics):
+    def __init__(self, data_loaders):
+        super().__init__(data_loaders=data_loaders,
+                         compute_emd=False,
+                         metrics_list=['degree', 'clustering', 'orbit', 'spectre'])

analysis/visualization.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import os
+from rdkit import Chem
+from rdkit.Chem import Draw, AllChem
+from rdkit.Geometry import Point3D
+from rdkit import RDLogger
+import imageio
+import networkx as nx
+import numpy as np
+import rdkit.Chem
+import wandb
+import matplotlib.pyplot as plt
+class MolecularVisualization:
+    def __init__(self, remove_h, dataset_infos):
+        self.remove_h = remove_h
+        self.dataset_infos = dataset_infos
+    def mol_from_graphs(self, node_list, adjacency_matrix):
+        """
+        Convert graphs to rdkit molecules
+        node_list: the nodes of a batch of nodes (bs x n)
+        adjacency_matrix: the adjacency_matrix of the molecule (bs x n x n)
+        """
+        # dictionary to map integer value to the char of atom
+        atom_decoder = self.dataset_infos.atom_decoder
+        # create empty editable mol object
+        mol = Chem.RWMol()
+        # add atoms to mol and keep track of index
+        node_to_idx = {}
+        for i in range(len(node_list)):
+            if node_list[i] == -1:
+                continue
+            a = Chem.Atom(atom_decoder[int(node_list[i])])
+            molIdx = mol.AddAtom(a)
+            node_to_idx[i] = molIdx
+        for ix, row in enumerate(adjacency_matrix):
+            for iy, bond in enumerate(row):
+                # only traverse half the symmetric matrix
+                if iy <= ix:
+                    continue
+                if bond == 1:
+                    bond_type = Chem.rdchem.BondType.SINGLE
+                elif bond == 2:
+                    bond_type = Chem.rdchem.BondType.DOUBLE
+                elif bond == 3:
+                    bond_type = Chem.rdchem.BondType.TRIPLE
+                elif bond == 4:
+                    bond_type = Chem.rdchem.BondType.AROMATIC
+                else:
+                    continue
+                mol.AddBond(node_to_idx[ix], node_to_idx[iy], bond_type)
+        try:
+            mol = mol.GetMol()
+        except rdkit.Chem.KekulizeException:
+            print("Can't kekulize molecule")
+            mol = None
+        return mol
+    def visualize(self, path: str, molecules: list, num_molecules_to_visualize: int, log='graph'):
+        # define path to save figures
+        if not os.path.exists(path):
+            os.makedirs(path)
+        # visualize the final molecules
+        print(f"Visualizing {num_molecules_to_visualize} of {len(molecules)}")
+        if num_molecules_to_visualize > len(molecules):
+            print(f"Shortening to {len(molecules)}")
+            num_molecules_to_visualize = len(molecules)
+        for i in range(num_molecules_to_visualize):
+            file_path = os.path.join(path, 'molecule_{}.png'.format(i))
+            mol = self.mol_from_graphs(molecules[i][0].numpy(), molecules[i][1].numpy())
+            try:
+                Draw.MolToFile(mol, file_path)
+                if wandb.run and log is not None:
+                    print(f"Saving {file_path} to wandb")
+                    wandb.log({log: wandb.Image(file_path)}, commit=True)
+            except rdkit.Chem.KekulizeException:
+                print("Can't kekulize molecule")
+    def visualize_chain(self, path, nodes_list, adjacency_matrix, trainer=None):
+        RDLogger.DisableLog('rdApp.*')
+        # convert graphs to the rdkit molecules
+        mols = [self.mol_from_graphs(nodes_list[i], adjacency_matrix[i]) for i in range(nodes_list.shape[0])]
+        # find the coordinates of atoms in the final molecule
+        final_molecule = mols[-1]
+        AllChem.Compute2DCoords(final_molecule)
+        coords = []
+        for i, atom in enumerate(final_molecule.GetAtoms()):
+            positions = final_molecule.GetConformer().GetAtomPosition(i)
+            coords.append((positions.x, positions.y, positions.z))
+        # align all the molecules
+        for i, mol in enumerate(mols):
+            AllChem.Compute2DCoords(mol)
+            conf = mol.GetConformer()
+            for j, atom in enumerate(mol.GetAtoms()):
+                x, y, z = coords[j]
+                conf.SetAtomPosition(j, Point3D(x, y, z))
+        # draw gif
+        save_paths = []
+        num_frams = nodes_list.shape[0]
+        for frame in range(num_frams):
+            file_name = os.path.join(path, 'fram_{}.png'.format(frame))
+            Draw.MolToFile(mols[frame], file_name, size=(300, 300), legend=f"Frame {frame}")
+            save_paths.append(file_name)
+        imgs = [imageio.imread(fn) for fn in save_paths]
+        gif_path = os.path.join(os.path.dirname(path), '{}.gif'.format(path.split('/')[-1]))
+        imgs.extend([imgs[-1]] * 10)
+        imageio.mimsave(gif_path, imgs, subrectangles=True, duration=20)
+        if wandb.run:
+            print(f"Saving {gif_path} to wandb")
+            wandb.log({"chain": wandb.Video(gif_path, fps=5, format="gif")}, commit=True)
+        # draw grid image
+        try:
+            img = Draw.MolsToGridImage(mols, molsPerRow=10, subImgSize=(200, 200))
+            img.save(os.path.join(path, '{}_grid_image.png'.format(path.split('/')[-1])))
+        except Chem.rdchem.KekulizeException:
+            print("Can't kekulize molecule")
+        return mols
+class NonMolecularVisualization:
+    def to_networkx(self, node_list, adjacency_matrix):
+        """
+        Convert graphs to networkx graphs
+        node_list: the nodes of a batch of nodes (bs x n)
+        adjacency_matrix: the adjacency_matrix of the molecule (bs x n x n)
+        """
+        graph = nx.Graph()
+        for i in range(len(node_list)):
+            if node_list[i] == -1:
+                continue
+            graph.add_node(i, number=i, symbol=node_list[i], color_val=node_list[i])
+        rows, cols = np.where(adjacency_matrix >= 1)
+        edges = zip(rows.tolist(), cols.tolist())
+        for edge in edges:
+            edge_type = adjacency_matrix[edge[0]][edge[1]]
+            graph.add_edge(edge[0], edge[1], color=float(edge_type), weight=3 * edge_type)
+        return graph
+    def visualize_non_molecule(self, graph, pos, path, iterations=100, node_size=100, largest_component=False):
+        if largest_component:
+            CGs = [graph.subgraph(c) for c in nx.connected_components(graph)]
+            CGs = sorted(CGs, key=lambda x: x.number_of_nodes(), reverse=True)
+            graph = CGs[0]
+        # Plot the graph structure with colors
+        if pos is None:
+            pos = nx.spring_layout(graph, iterations=iterations)
+        # Set node colors based on the eigenvectors
+        w, U = np.linalg.eigh(nx.normalized_laplacian_matrix(graph).toarray())
+        vmin, vmax = np.min(U[:, 1]), np.max(U[:, 1])
+        m = max(np.abs(vmin), vmax)
+        vmin, vmax = -m, m
+        plt.figure()
+        nx.draw(graph, pos, font_size=5, node_size=node_size, with_labels=False, node_color=U[:, 1],
+                cmap=plt.cm.coolwarm, vmin=vmin, vmax=vmax, edge_color='grey')
+        plt.tight_layout()
+        plt.savefig(path)
+        plt.close("all")
+    def visualize(self, path: str, graphs: list, num_graphs_to_visualize: int, log='graph'):
+        # define path to save figures
+        if not os.path.exists(path):
+            os.makedirs(path)
+        # visualize the final molecules
+        for i in range(num_graphs_to_visualize):
+            file_path = os.path.join(path, 'graph_{}.png'.format(i))
+            graph = self.to_networkx(graphs[i][0].numpy(), graphs[i][1].numpy())
+            self.visualize_non_molecule(graph=graph, pos=None, path=file_path)
+            im = plt.imread(file_path)
+            if wandb.run and log is not None:
+                wandb.log({log: [wandb.Image(im, caption=file_path)]})
+    def visualize_chain(self, path, nodes_list, adjacency_matrix):
+        # convert graphs to networkx
+        graphs = [self.to_networkx(nodes_list[i], adjacency_matrix[i]) for i in range(nodes_list.shape[0])]
+        # find the coordinates of atoms in the final molecule
+        final_graph = graphs[-1]
+        final_pos = nx.spring_layout(final_graph, seed=0)
+        # draw gif
+        save_paths = []
+        num_frams = nodes_list.shape[0]
+        for frame in range(num_frams):
+            file_name = os.path.join(path, 'fram_{}.png'.format(frame))
+            self.visualize_non_molecule(graph=graphs[frame], pos=final_pos, path=file_name)
+            save_paths.append(file_name)
+        imgs = [imageio.imread(fn) for fn in save_paths]
+        gif_path = os.path.join(os.path.dirname(path), '{}.gif'.format(path.split('/')[-1]))
+        imgs.extend([imgs[-1]] * 10)
+        imageio.mimsave(gif_path, imgs, subrectangles=True, duration=20)
+        if wandb.run:
+            wandb.log({'chain': [wandb.Video(gif_path, caption=gif_path, format="gif")]})

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from omegaconf import OmegaConf
+import gradio as gr
+from dataset import init_dataset, compute_input_output_dims
+from extra_features import ExtraFeatures
+from demo_model import LGGMText2Graph_Demo
+from analysis.spectre_utils import CrossDomainSamplingMetrics
+import networkx as nx
+import numpy as np
+import matplotlib.pyplot as plt
+cfg = OmegaConf.load('./config.yaml')
+hydra_path = '.'
+data_loaders, num_classes, max_n_nodes, nodes_dist, edge_types, node_types, n_nodes, cond_dims, cond_emb = init_dataset(cfg.dataset.name, cfg.train.batch_size, hydra_path, cfg.general.condition, cfg.model.transition)
+extra_features = ExtraFeatures(cfg.model.extra_features, max_n_nodes)
+input_dims, output_dims = compute_input_output_dims(data_loaders['train'], extra_features)
+sampling_metrics = CrossDomainSamplingMetrics(data_loaders)
+model = LGGMText2Graph_Demo.load_from_checkpoint('last-v1.ckpt')
+model.init_prompt_encoder()
+def calculate_average_degree(graph):
+    num_nodes = graph.number_of_nodes()
+    num_edges = graph.number_of_edges()
+    return (2 * num_edges) / num_nodes if num_nodes > 0 else 0
+def predict(text, num_nodes = None):
+    # Assuming model.generate and other processes are defined as before
+    graphs = model.generate(text, int(num_nodes))
+    ccs = []
+    degs = []
+    images = []
+    for g in graphs:
+        ccs.append(nx.average_clustering(g))
+        degs.append(calculate_average_degree(g))
+        fig, ax = plt.subplots()
+        nx.draw(g, ax=ax)
+        fig.canvas.draw()
+        image = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
+        image = image.reshape(fig.canvas.get_width_height()[::-1] + (3,))
+        plt.close(fig)
+        images.append(image)
+    return images[0], images[1], images[2], images[3], images[4], ccs[0], ccs[1], ccs[2], ccs[3], ccs[4], degs[0], degs[1], degs[2], degs[3], degs[4]
+def clear(input_text):
+    return None, None
+with gr.Blocks() as demo:
+    gr.Markdown("## Text2Graph Generation Demo")
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(label="Input your text prompt here", placeholder="Type here...")
+        with gr.Column():
+            input_num = gr.Slider(5, 200, value=10, label="Count", info="Number of nodes in the graph to be generated")
+        with gr.Column():
+            gr.Markdown("### Suggested Prompts")
+            gr.Markdown("1. Create a complex network with high clustering coefficient.\n2. Create a graph with extremely low number of triangles.")
+    with gr.Row() as output_row:
+        output_images = [gr.Image(label = f"Generated Network #{_}") for _ in range(5)]
+    with gr.Row():
+        output_texts_cc = [gr.Textbox(label=f"CC #{_}") for _ in range(5)]
+    with gr.Row():
+        output_texts_deg = [gr.Textbox(label=f"DEG #{_}") for _ in range(5)]
+    with gr.Row():
+        submit_button = gr.Button("Submit")
+        clear_button = gr.Button("Clear")
+    # Change function is linked to the submit button
+    submit_button.click(fn=predict, inputs=[input_text, input_num], outputs=output_images + output_texts_cc + output_texts_deg)
+    # Clear function resets the text input and clears the outputs
+    clear_button.click(fn=clear, inputs=input_text, outputs=output_images + output_texts_cc + output_texts_deg)
+demo.launch()

config.yaml ADDED Viewed

	@@ -0,0 +1,53 @@

+hydra:
+  job:
+    chdir: True
+  run:
+    dir: ../outputs/${general.name}
+general:
+    name: 'cc_high'
+    wandb: 'online'                # online | offline | disabled
+    gpus: 1
+    resume: null
+    test_only: null
+    sample_every_val: 4
+    check_val_every_n_epochs: 10
+    samples_to_generate: 100
+    samples_to_save: 3
+    chains_to_save: 1
+    log_every_steps: 50
+    number_chain_steps: 8
+    final_model_samples_to_generate: 100
+    final_model_samples_to_save: 30
+    final_model_chains_to_save: 20
+    condition: ' '
+    setting: 'train_scratch'
+    ckpt_path: null
+model:
+    type: 'discrete'
+    transition: 'marginal'
+    model: 'graph_tf'
+    diffusion_steps: 500
+    diffusion_noise_schedule: 'cosine'
+    n_layers: 5
+    extra_features: 'all'
+    hidden_mlp_dims: {'X': 256, 'E': 128, 'y': 128}
+    hidden_dims: {'dx': 256, 'de': 64, 'dy': 64, 'n_head': 8, 'dim_ffX': 256, 'dim_ffE': 128, 'dim_ffy': 128}
+    lambda_train: [5, 0]
+train:
+    n_epochs: 300
+    batch_size: 8
+    accumulate_grad_batches: 1
+    lr: 0.0002
+    clip_grad: null
+    save_model: True
+    num_workers: 0
+    ema_decay: 0
+    weight_decay: 1e-12
+    seed: 0
+    progress_bar: false
+    optimizer: adamw
+dataset:
+    datadir: 'graph/'
+    name: cc_high
+    remove_h: null
+    sample: 'seed'

dataset.py ADDED Viewed

	@@ -0,0 +1,395 @@

+import torch
+from distributions import DistributionNodes
+from utils import to_dense
+from torch_geometric.loader import DataLoader
+from torch_geometric.data import Data
+from torch_geometric.utils import remove_self_loops, to_undirected
+import os
+from sentence_transformers import SentenceTransformer
+import random
+def arrange_data(adj_matrix, cond_emb, ind):
+    n_nodes = adj_matrix.shape[0]
+    edge_index = adj_matrix.nonzero().t()
+    edge_attr = torch.tensor([[0, 1] for _ in range(edge_index.shape[1])])
+    edge_index, edge_attr = to_undirected(edge_index, edge_attr, n_nodes, reduce = 'mean')
+    edge_index, edge_attr = remove_self_loops(edge_index, edge_attr)
+    x = torch.ones((n_nodes, 1))
+    y = torch.empty(1, 0)
+    cond_emb = torch.tensor(cond_emb).unsqueeze(0)
+    return Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, prompt_id = torch.tensor(ind), cond_emb = cond_emb)
+def load_dataset_cc(dataname, batch_size, hydra_path, condition):
+    domains = ['cc_high', 'cc_medium', 'cc_low']
+    model = SentenceTransformer("all-MiniLM-L6-v2")
+    cond_embs = model.encode(condition)
+    for domain in domains:
+        if not os.path.exists(f'{hydra_path}/graphs/{domain}/train.pt'):
+            data = torch.load(f'{hydra_path}/graphs/{domain}/{domain}.pt')
+            #fix seed
+            torch.manual_seed(0)
+            #random permute and split
+            n = len(data)
+            indices = torch.randperm(n)
+            if domain == 'eco':
+                train_indices = indices[:4].repeat(50)
+                val_indices = indices[4:5].repeat(50)
+                test_indices = indices[5:]
+            else:
+                train_indices = indices[:int(0.7 * n)]
+                val_indices = indices[int(0.7 * n):int(0.8 * n)]
+                test_indices = indices[int(0.8 * n):]
+            train_data = [data[_] for _ in train_indices]
+            val_data = [data[_] for _ in val_indices]
+            test_data = [data[_] for _ in test_indices]
+            torch.save(train_indices, f'{hydra_path}/graphs/{domain}/train_indices.pt')
+            torch.save(val_indices, f'{hydra_path}/graphs/{domain}/val_indices.pt')
+            torch.save(test_indices, f'{hydra_path}/graphs/{domain}/test_indices.pt')
+            torch.save(train_data, f'{hydra_path}/graphs/{domain}/train.pt')
+            torch.save(val_data, f'{hydra_path}/graphs/{domain}/val.pt')
+            torch.save(test_data, f'{hydra_path}/graphs/{domain}/test.pt')
+    train_data, val_data, test_data = [], [], []
+    if dataname in domains: #only for test
+        train_d = torch.load(f'{hydra_path}/graphs/{dataname}/train.pt')
+        val_d = torch.load(f'{hydra_path}/graphs/{dataname}/val.pt')
+        test_d = torch.load(f'{hydra_path}/graphs/{dataname}/test.pt')
+        train_indices = torch.load(f'{hydra_path}/graphs/{dataname}/train_indices.pt')
+        val_indices = torch.load(f'{hydra_path}/graphs/{dataname}/val_indices.pt')
+        test_indices = torch.load(f'{hydra_path}/graphs/{dataname}/test_indices.pt')
+        with open(f'{hydra_path}/graphs/{dataname}/text_prompt_order.txt', 'r') as f:
+            text_prompt = f.readlines()
+            text_prompt = [x.strip() for x in text_prompt]
+        # text_prompt = ['1111111shgowhgo234o234']*10000
+        print(text_prompt[0])
+        text_embs = model.encode(text_prompt)
+        cond_embs = torch.tensor(text_embs)
+        train_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)])
+        val_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_d, val_indices)])
+        if dataname != 'eco':
+            # test_data = [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_d, test_indices)]
+            test_data = [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_d, test_indices)]
+        else:
+            test_data = [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)] + [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_data, val_indices)] + [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_data, test_indices)]
+    elif dataname == 'all':
+        for i, domain in enumerate(domains):
+            train_d = torch.load(f'{hydra_path}/graphs/{domain}/train.pt')
+            val_d = torch.load(f'{hydra_path}/graphs/{domain}/val.pt')
+            test_d = torch.load(f'{hydra_path}/graphs/{domain}/test.pt')
+            train_indices = torch.load(f'{hydra_path}/graphs/{domain}/train_indices.pt')
+            val_indices = torch.load(f'{hydra_path}/graphs/{domain}/val_indices.pt')
+            test_indices = torch.load(f'{hydra_path}/graphs/{domain}/test_indices.pt')
+            # text_prompt = torch.load(f'{hydra_path}/graphs/{domain}/text_prompt_order.pt')
+            with open(f'{hydra_path}/graphs/{domain}/text_prompt_order.txt', 'r') as f:
+                text_prompt = f.readlines()
+                text_prompt = [x.strip() for x in text_prompt]
+            print(domain, text_prompt[0])
+            text_embs = model.encode(text_prompt)
+            train_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)])
+            val_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_d, val_indices)])
+            test_data.extend([arrange_data(d, text_embs[ind.item()], ind.item())  for d, ind in zip(test_d, test_indices)])
+            print(i, domain, len(train_data), len(val_data), len(test_data))
+    print('Size of dataset', len(train_data), len(val_data), len(test_data))
+    train_loader = DataLoader(train_data, batch_size = batch_size, shuffle=True)
+    val_loader = DataLoader(val_data, batch_size = batch_size, shuffle=False)
+    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
+    return train_loader, val_loader, test_loader, train_data, val_data, test_data, text_embs.shape[1], torch.tensor(cond_embs)
+def load_dataset_deg(dataname, batch_size, hydra_path, condition):
+    domains = ['deg_high', 'deg_medium', 'deg_low']
+    model = SentenceTransformer("all-MiniLM-L6-v2")
+    cond_embs = model.encode(condition)
+    for domain in domains:
+        if not os.path.exists(f'{hydra_path}/graphs/{domain}/train.pt'):
+            data = torch.load(f'{hydra_path}/graphs/{domain}/{domain}.pt')
+            #fix seed
+            torch.manual_seed(0)
+            #random permute and split
+            n = len(data)
+            indices = torch.randperm(n)
+            if domain == 'eco':
+                train_indices = indices[:4].repeat(50)
+                val_indices = indices[4:5].repeat(50)
+                test_indices = indices[5:]
+            else:
+                train_indices = indices[:int(0.7 * n)]
+                val_indices = indices[int(0.7 * n):int(0.8 * n)]
+                test_indices = indices[int(0.8 * n):]
+            train_data = [data[_] for _ in train_indices]
+            val_data = [data[_] for _ in val_indices]
+            test_data = [data[_] for _ in test_indices]
+            torch.save(train_indices, f'{hydra_path}/graphs/{domain}/train_indices.pt')
+            torch.save(val_indices, f'{hydra_path}/graphs/{domain}/val_indices.pt')
+            torch.save(test_indices, f'{hydra_path}/graphs/{domain}/test_indices.pt')
+            torch.save(train_data, f'{hydra_path}/graphs/{domain}/train.pt')
+            torch.save(val_data, f'{hydra_path}/graphs/{domain}/val.pt')
+            torch.save(test_data, f'{hydra_path}/graphs/{domain}/test.pt')
+    train_data, val_data, test_data = [], [], []
+    if dataname in domains: #only for test
+        train_d = torch.load(f'{hydra_path}/graphs/{dataname}/train.pt')
+        val_d = torch.load(f'{hydra_path}/graphs/{dataname}/val.pt')
+        test_d = torch.load(f'{hydra_path}/graphs/{dataname}/test.pt')
+        train_indices = torch.load(f'{hydra_path}/graphs/{dataname}/train_indices.pt')
+        val_indices = torch.load(f'{hydra_path}/graphs/{dataname}/val_indices.pt')
+        test_indices = torch.load(f'{hydra_path}/graphs/{dataname}/test_indices.pt')
+        with open(f'{hydra_path}/graphs/{dataname}/text_prompt_order.txt', 'r') as f:
+            text_prompt = f.readlines()
+            text_prompt = [x.strip() for x in text_prompt]
+        text_embs = model.encode(text_prompt)
+        cond_embs = torch.tensor(text_embs)
+        train_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)])
+        val_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_d, val_indices)])
+        if dataname != 'eco':
+            test_data = [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_d, test_indices)]
+        else:
+            test_data = [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)] + [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_data, val_indices)] + [arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(test_data, test_indices)]
+    elif dataname == 'all':
+        for i, domain in enumerate(domains):
+            train_d = torch.load(f'{hydra_path}/graphs/{domain}/train.pt')
+            val_d = torch.load(f'{hydra_path}/graphs/{domain}/val.pt')
+            test_d = torch.load(f'{hydra_path}/graphs/{domain}/test.pt')
+            train_indices = torch.load(f'{hydra_path}/graphs/{domain}/train_indices.pt')
+            val_indices = torch.load(f'{hydra_path}/graphs/{domain}/val_indices.pt')
+            test_indices = torch.load(f'{hydra_path}/graphs/{domain}/test_indices.pt')
+            # text_prompt = torch.load(f'{hydra_path}/graphs/{domain}/text_prompt_order.pt')
+            with open(f'{hydra_path}/graphs/{domain}/text_prompt_order.txt', 'r') as f:
+                text_prompt = f.readlines()
+                text_prompt = [x.strip() for x in text_prompt]
+            print(domain, text_prompt[0])
+            text_embs = model.encode(text_prompt)
+            train_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(train_d, train_indices)])
+            val_data.extend([arrange_data(d, text_embs[ind.item()], ind.item()) for d, ind in zip(val_d, val_indices)])
+            test_data.extend([arrange_data(d, text_embs[ind.item()], ind.item())  for d, ind in zip(test_d, test_indices)])
+            print(i, domain, len(train_data), len(val_data), len(test_data))
+    print('Size of dataset', len(train_data), len(val_data), len(test_data))
+    train_loader = DataLoader(train_data, batch_size = batch_size, shuffle=True)
+    val_loader = DataLoader(val_data, batch_size = batch_size, shuffle=False)
+    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
+    return train_loader, val_loader, test_loader, train_data, val_data, test_data, text_embs.shape[1], torch.tensor(cond_embs)
+def init_dataset(dataname, batch_size, hydra_path, condition, transition):
+    train_loader, val_loader, test_loader, train_data, val_data, test_data, cond_dims, cond_emb = load_dataset_cc(dataname, batch_size, hydra_path, condition)
+    n_nodes = node_counts(1000, train_loader, val_loader)
+    node_types = torch.tensor([1]) #No node types
+    edge_types = edge_counts(train_loader)
+    num_classes = len(node_types)
+    max_n_nodes = len(n_nodes) - 1
+    nodes_dist = DistributionNodes(n_nodes)
+    print('Distribution of Number of Nodes:', n_nodes)
+    print('Distribution of Node Types:', node_types)
+    print('Distribution of Edge Types:', edge_types)
+    data_loaders = {'train': train_loader, 'val': val_loader, 'test': test_loader}
+    return data_loaders, num_classes, max_n_nodes, nodes_dist, edge_types, node_types, n_nodes, cond_dims, cond_emb
+def node_counts(max_nodes_possible, train_loader, val_loader):
+    #Count the distribution of graph size
+    all_counts = torch.zeros(max_nodes_possible)
+    for loader in [train_loader, val_loader]:
+        for data in loader:
+            unique, counts = torch.unique(data.batch, return_counts=True)
+            for count in counts:
+                all_counts[count] += 1
+    max_index = max(all_counts.nonzero())
+    all_counts = all_counts[:max_index + 1]
+    all_counts = all_counts / all_counts.sum()
+    return all_counts
+def node_counts_meta(max_nodes_possible, train_data, val_data, num_classes):
+    #Count the distribution of graph size
+    all_counts = [torch.zeros(max_nodes_possible) for _ in range(num_classes)]
+    for dataset in [train_data, val_data]:
+        for data in dataset:
+            all_counts[data.cond_type.item()][data.x.shape[0]] += 1
+    for _ in range(num_classes):
+        tmp = all_counts[_].nonzero()
+        if len(tmp) == 0:
+            max_index = 1
+            all_counts[_][0] = 1
+        else:
+            max_index = max(tmp)
+        all_counts[_] = all_counts[_][:max_index + 1]
+        all_counts[_] = all_counts[_] / all_counts[_].sum()
+    return all_counts
+def node_types(train_loader):
+    #Count the marginal distribution of node types
+    num_classes = None
+    for data in train_loader:
+        num_classes = data.x.shape[1]
+        break
+    counts = torch.zeros(num_classes)
+    for i, data in enumerate(train_loader):
+        counts += data.x.sum(dim=0)
+    counts = counts / counts.sum()
+    return counts
+def edge_counts(train_loader):
+    #Count the marginal distribution of edge types
+    num_classes = None
+    for data in train_loader:
+        num_classes = data.edge_attr.shape[1]
+        break
+    d = torch.zeros(num_classes, dtype=torch.float)
+    for i, data in enumerate(train_loader):
+        unique, counts = torch.unique(data.batch, return_counts=True)
+        all_pairs = 0
+        for count in counts:
+            all_pairs += count * (count - 1)
+        num_edges = data.edge_index.shape[1]
+        num_non_edges = all_pairs - num_edges
+        edge_types = data.edge_attr.sum(dim=0)
+        assert num_non_edges >= 0
+        d[0] += num_non_edges
+        d[1:] += edge_types[1:]
+    d = d / d.sum()
+    return d
+def edge_counts_meta(train_data, num_classes):
+    #Count the marginal distribution of edge types
+    num_edge_classes = None
+    for data in train_data:
+        num_edge_classes = data.edge_attr.shape[1]
+        break
+    d = [torch.ones(num_edge_classes, dtype=torch.float) for _ in range(num_classes)]
+    for i, data in enumerate(train_data):
+        n_nodes = data.x.shape[0]
+        all_pairs = n_nodes * (n_nodes - 1)
+        num_edges = data.edge_index.shape[1]
+        num_non_edges = all_pairs - num_edges
+        edge_types = data.edge_attr.sum(dim=0)
+        assert num_non_edges >= 0
+        d[data.cond_type.item()][0] += num_non_edges
+        d[data.cond_type.item()][1:] += edge_types[1:]
+    for i, _ in enumerate(d):
+        d[i] = d[i] / d[i].sum()
+    d = torch.stack(d)
+    return d
+def compute_input_output_dims(train_loader, extra_features):
+    example_batch = next(iter(train_loader))
+    ex_dense, node_mask = to_dense(example_batch.x, example_batch.edge_index, example_batch.edge_attr, example_batch.batch)
+    example_data = {'X_t': ex_dense.X, 'E_t': ex_dense.E, 'y_t': example_batch['y'], 'node_mask': node_mask}
+    input_dims = {'X': example_batch['x'].size(1),
+                  'E': example_batch['edge_attr'].size(1),
+                  'y': example_batch['y'].size(1) + 1}      # + 1 due to time conditioning
+    ex_extra_feat = extra_features(example_data)
+    input_dims['X'] += ex_extra_feat.X.size(-1)
+    input_dims['E'] += ex_extra_feat.E.size(-1)
+    input_dims['y'] += ex_extra_feat.y.size(-1)
+    output_dims = {'X': example_batch['x'].size(1),
+                   'E': example_batch['edge_attr'].size(1),
+                   'y': 0}
+    return input_dims, output_dims

demo_model.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from tqdm import tqdm
+from models.transformer_model import GraphTransformer
+from diffusion.noise_schedule import DiscreteUniformTransition, PredefinedNoiseScheduleDiscrete
+from diffusion import diffusion_utils
+import utils
+import networkx as nx
+from sentence_transformers import SentenceTransformer
+import pytorch_lightning as pl
+class LGGMText2Graph_Demo(pl.LightningModule):
+    def __init__(self, cfg, input_dims, output_dims, cond_dims, cond_emb, \
+                 nodes_dist, node_types, edge_types, extra_features, data_loaders):
+        super().__init__()
+        nodes_dist = nodes_dist
+        self.cfg = cfg
+        self.T = cfg.model.diffusion_steps
+        self.Xdim = input_dims['X']
+        self.Edim = input_dims['E']
+        self.ydim = input_dims['y']
+        self.Xdim_output = output_dims['X']
+        self.Edim_output = output_dims['E']
+        self.ydim_output = output_dims['y']
+        self.node_dist = nodes_dist
+        self.extra_features = extra_features
+        self.model = GraphTransformer(n_layers=cfg.model.n_layers,
+                                      input_dims=input_dims,
+                                      hidden_mlp_dims=cfg.model.hidden_mlp_dims,
+                                      hidden_dims=cfg.model.hidden_dims,
+                                      output_dims=output_dims,
+                                      cond_dims = cond_dims,
+                                      act_fn_in=nn.ReLU(),
+                                      act_fn_out=nn.ReLU()).to(self.device)
+        self.noise_schedule = PredefinedNoiseScheduleDiscrete(cfg.model.diffusion_noise_schedule,
+                                                              timesteps=cfg.model.diffusion_steps).to(self.device)
+        self.transition_model = DiscreteUniformTransition(x_classes=self.Xdim_output, e_classes=self.Edim_output,
+                                                            y_classes=self.ydim_output)
+        x_limit = torch.ones(self.Xdim_output) / self.Xdim_output
+        e_limit = torch.ones(self.Edim_output) / self.Edim_output
+        y_limit = torch.ones(self.ydim_output) / self.ydim_output
+        self.limit_dist = utils.PlaceHolder(X=x_limit, E=e_limit, y=y_limit)
+    def generate(self, text, num_nodes) -> None:
+        print(num_nodes)
+        prompt_emb = torch.tensor(self.text_encoder.encode([text])).to(self.device)
+        samples = self.sample_batch(5, cond_emb = prompt_emb, num_nodes = num_nodes)
+        nx_graphs = []
+        for graph in samples:
+            node_types, edge_types = graph
+            A = edge_types.bool().cpu().numpy()
+            nx_graph = nx.from_numpy_array(A)
+            nx_graphs.append(nx_graph)
+        return nx_graphs
+    def init_prompt_encoder(self):
+        self.text_encoder = SentenceTransformer("all-MiniLM-L6-v2")
+    @torch.no_grad()
+    def sample_batch(self, batch_size: int,  cond_emb = None, num_nodes = None):
+        """
+        :param batch_id: int
+        :param batch_size: int
+        :param num_nodes: int, <int>tensor (batch_size) (optional) for specifying number of nodes
+        :param save_final: int: number of predictions to save to file
+        :param keep_chain: int: number of chains to save to file
+        :param keep_chain_steps: number of timesteps to save for each chain
+        :return: molecule_list. Each element of this list is a tuple (atom_types, charges, positions)
+        """
+        if num_nodes is None:
+            n_nodes = self.node_dist.sample_n(batch_size, self.device)
+        elif type(num_nodes) == int:
+            n_nodes = num_nodes * torch.ones(batch_size, device=self.device, dtype=torch.int)
+        n_max = torch.max(n_nodes).item()
+        # Build the masks
+        arange = torch.arange(n_max, device=self.device).unsqueeze(0).expand(batch_size, -1)
+        node_mask = arange < n_nodes.unsqueeze(1)
+        # Sample noise  -- z has size (n_samples, n_nodes, n_features)
+        z_T = diffusion_utils.sample_discrete_feature_noise(limit_dist=self.limit_dist, node_mask=node_mask, transition=self.cfg.model.transition)
+        X, E, y = z_T.X, z_T.E, z_T.y
+        # Iteratively sample p(z_s | z_t) for t = 1, ..., T, with s = t - 1.
+        for s_int in tqdm(reversed(range(0, self.T))):
+            s_array = s_int * torch.ones((batch_size, 1)).type_as(y)
+            t_array = s_array + 1
+            s_norm = s_array / self.T
+            t_norm = t_array / self.T
+            # Sample z_s
+            sampled_s = self.sample_p_zs_given_zt(s_norm, t_norm, X, E, y, node_mask, cond_emb)
+            X, E, y = sampled_s.X, sampled_s.E, sampled_s.y
+        # Sample
+        sampled_s = sampled_s.mask(node_mask, collapse=True)
+        X, E, y = sampled_s.X, sampled_s.E, sampled_s.y
+        graph_list = []
+        for i in range(batch_size):
+            n = n_nodes[i]
+            node_types = X[i, :n].cpu()
+            edge_types = E[i, :n, :n].cpu()
+            graph_list.append([node_types, edge_types])
+        return graph_list
+    def sample_p_zs_given_zt(self, s, t, X_t, E_t, y_t, node_mask, cond_emb):
+        """Samples from zs ~ p(zs | zt). Only used during sampling.
+           if last_step, return the graph prediction as well"""
+        bs, n, dxs = X_t.shape
+        beta_t = self.noise_schedule(t_normalized=t)  # (bs, 1)
+        alpha_s_bar = self.noise_schedule.get_alpha_bar(t_normalized=s)
+        alpha_t_bar = self.noise_schedule.get_alpha_bar(t_normalized=t)
+        # Retrieve transitions matrix
+        Qtb = self.transition_model.get_Qt_bar(alpha_t_bar, self.device)
+        Qsb = self.transition_model.get_Qt_bar(alpha_s_bar, self.device)
+        Qt = self.transition_model.get_Qt(beta_t, self.device)
+        noisy_data = {'X_t': X_t, 'E_t': E_t, 'y_t': y_t, 't': t, 'node_mask': node_mask, 'cond_emb': cond_emb.repeat(X_t.shape[0], 1)}
+        extra_data = self.compute_extra_data(noisy_data)
+        pred = self.forward(noisy_data, extra_data, node_mask)
+        # Normalize predictions
+        pred_X = F.softmax(pred.X, dim=-1)               # bs, n, d0
+        pred_E = F.softmax(pred.E, dim=-1)               # bs, n, n, d0
+        p_s_and_t_given_0_X = diffusion_utils.compute_batched_over0_posterior_distribution(X_t=X_t,
+                                                                                           Qt=Qt.X,
+                                                                                           Qsb=Qsb.X,
+                                                                                           Qtb=Qtb.X)
+        p_s_and_t_given_0_E = diffusion_utils.compute_batched_over0_posterior_distribution(X_t=E_t,
+                                                                                           Qt=Qt.E,
+                                                                                           Qsb=Qsb.E,
+                                                                                           Qtb=Qtb.E)
+        # Dim of these two tensors: bs, N, d0, d_t-1
+        weighted_X = pred_X.unsqueeze(-1) * p_s_and_t_given_0_X         # bs, n, d0, d_t-1
+        unnormalized_prob_X = weighted_X.sum(dim=2)                     # bs, n, d_t-1
+        unnormalized_prob_X[torch.sum(unnormalized_prob_X, dim=-1) == 0] = 1e-5
+        prob_X = unnormalized_prob_X / torch.sum(unnormalized_prob_X, dim=-1, keepdim=True)  # bs, n, d_t-1
+        pred_E = pred_E.reshape((bs, -1, pred_E.shape[-1]))
+        weighted_E = pred_E.unsqueeze(-1) * p_s_and_t_given_0_E        # bs, N, d0, d_t-1
+        unnormalized_prob_E = weighted_E.sum(dim=-2)
+        unnormalized_prob_E[torch.sum(unnormalized_prob_E, dim=-1) == 0] = 1e-5
+        prob_E = unnormalized_prob_E / torch.sum(unnormalized_prob_E, dim=-1, keepdim=True)
+        prob_E = prob_E.reshape(bs, n, n, pred_E.shape[-1])
+        assert ((prob_X.sum(dim=-1) - 1).abs() < 1e-4).all()
+        assert ((prob_E.sum(dim=-1) - 1).abs() < 1e-4).all()
+        sampled_s = diffusion_utils.sample_discrete_features(prob_X, prob_E, node_mask=node_mask)
+        X_s = F.one_hot(sampled_s.X, num_classes=self.Xdim_output).float()
+        E_s = F.one_hot(sampled_s.E, num_classes=self.Edim_output).float()
+        assert (E_s == torch.transpose(E_s, 1, 2)).all()
+        assert (X_t.shape == X_s.shape) and (E_t.shape == E_s.shape)
+        out_one_hot = utils.PlaceHolder(X=X_s, E=E_s, y=torch.zeros(y_t.shape[0], 0))
+        return out_one_hot.mask(node_mask).type_as(y_t)
+    def compute_extra_data(self, noisy_data):
+        """ At every training step (after adding noise) and step in sampling, compute extra information and append to
+            the network input. """
+        extra_features = self.extra_features(noisy_data)
+        # print(extra_features.X.shape, extra_features.E.shape, extra_features.y.shape)
+        extra_X = extra_features.X
+        extra_E = extra_features.E
+        extra_y = extra_features.y
+        t = noisy_data['t']
+        extra_y = torch.cat((extra_y, t), dim=1)
+        return utils.PlaceHolder(X=extra_X, E=extra_E, y=extra_y)
+    def forward(self, noisy_data, extra_data, node_mask):
+        # print(noisy_data['cond_emb'].sum())
+        B = noisy_data['cond_emb'].unsqueeze(1).unsqueeze(2).expand(-1, noisy_data['X_t'].shape[1], noisy_data['X_t'].shape[1], -1).to(self.device)
+        A = noisy_data['cond_emb'].unsqueeze(1).expand(-1, noisy_data['X_t'].shape[1], -1).to(self.device)
+        X = torch.cat((noisy_data['X_t'], extra_data.X, A), dim=2).float()
+        E = torch.cat((noisy_data['E_t'], extra_data.E, B), dim=3).float()
+        y = torch.hstack((noisy_data['y_t'], extra_data.y)).float()
+        return self.model(X, E, y, node_mask)

diffusion/__init__.py ADDED Viewed

File without changes

diffusion/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (166 Bytes). View file

diffusion/__pycache__/diffusion_utils.cpython-39.pyc ADDED Viewed

Binary file (13 kB). View file

diffusion/__pycache__/noise_schedule.cpython-39.pyc ADDED Viewed

Binary file (7.85 kB). View file

diffusion/diffusion_utils.py ADDED Viewed

	@@ -0,0 +1,437 @@

+import torch
+from torch.nn import functional as F
+import numpy as np
+import math
+class PlaceHolder:
+    def __init__(self, X, E, y):
+        self.X = X
+        self.E = E
+        self.y = y
+    def type_as(self, x: torch.Tensor):
+        """ Changes the device and dtype of X, E, y. """
+        self.X = self.X.type_as(x)
+        self.E = self.E.type_as(x)
+        self.y = self.y.type_as(x)
+        return self
+    def mask(self, node_mask, collapse=False):
+        x_mask = node_mask.unsqueeze(-1)          # bs, n, 1
+        e_mask1 = x_mask.unsqueeze(2)             # bs, n, 1, 1
+        e_mask2 = x_mask.unsqueeze(1)             # bs, 1, n, 1
+        if collapse:
+            self.X = torch.argmax(self.X, dim=-1)
+            self.E = torch.argmax(self.E, dim=-1)
+            self.X[node_mask == 0] = - 1
+            self.E[(e_mask1 * e_mask2).squeeze(-1) == 0] = - 1
+        else:
+            self.X = self.X * x_mask
+            self.E = self.E * e_mask1 * e_mask2
+            assert torch.allclose(self.E, torch.transpose(self.E, 1, 2))
+        return self
+def setup_wandb(cfg):
+    config_dict = omegaconf.OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True)
+    kwargs = {'name': cfg.general.name, 'project': f'graph_ddm_{cfg.dataset.name}', 'config': config_dict,
+              'settings': wandb.Settings(_disable_stats=True), 'reinit': True, 'mode': cfg.general.wandb}
+    wandb.init(**kwargs)
+    wandb.save('*.txt')
+def sum_except_batch(x):
+    return x.reshape(x.size(0), -1).sum(dim=-1)
+def assert_correctly_masked(variable, node_mask):
+    assert (variable * (1 - node_mask.long())).abs().max().item() < 1e-4, \
+        'Variables not masked properly.'
+def sample_gaussian(size):
+    x = torch.randn(size)
+    return x
+def sample_gaussian_with_mask(size, node_mask):
+    x = torch.randn(size)
+    x = x.type_as(node_mask.float())
+    x_masked = x * node_mask
+    return x_masked
+def clip_noise_schedule(alphas2, clip_value=0.001):
+    """
+    For a noise schedule given by alpha^2, this clips alpha_t / alpha_t-1. This may help improve stability during
+    sampling.
+    """
+    alphas2 = np.concatenate([np.ones(1), alphas2], axis=0)
+    alphas_step = (alphas2[1:] / alphas2[:-1])
+    alphas_step = np.clip(alphas_step, a_min=clip_value, a_max=1.)
+    alphas2 = np.cumprod(alphas_step, axis=0)
+    return alphas2
+def cosine_beta_schedule(timesteps, s=0.008, raise_to_power: float = 1):
+    """
+    cosine schedule
+    as proposed in https://openreview.net/forum?id=-NEXDKk8gZ
+    """
+    steps = timesteps + 2
+    x = np.linspace(0, steps, steps)
+    alphas_cumprod = np.cos(((x / steps) + s) / (1 + s) * np.pi * 0.5) ** 2
+    alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
+    betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1])
+    betas = np.clip(betas, a_min=0, a_max=0.999)
+    alphas = 1. - betas
+    alphas_cumprod = np.cumprod(alphas, axis=0)
+    if raise_to_power != 1:
+        alphas_cumprod = np.power(alphas_cumprod, raise_to_power)
+    return alphas_cumprod
+def cosine_beta_schedule_discrete(timesteps, s=0.008):
+    """ Cosine schedule as proposed in https://openreview.net/forum?id=-NEXDKk8gZ. """
+    steps = timesteps + 2
+    x = np.linspace(0, steps, steps)
+    alphas_cumprod = np.cos(0.5 * np.pi * ((x / steps) + s) / (1 + s)) ** 2
+    alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
+    alphas = (alphas_cumprod[1:] / alphas_cumprod[:-1])
+    betas = 1 - alphas
+    return betas.squeeze()
+def custom_beta_schedule_discrete(timesteps, average_num_nodes=50, s=0.008):
+    """ Cosine schedule as proposed in https://openreview.net/forum?id=-NEXDKk8gZ. """
+    steps = timesteps + 2
+    x = np.linspace(0, steps, steps)
+    alphas_cumprod = np.cos(0.5 * np.pi * ((x / steps) + s) / (1 + s)) ** 2
+    alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
+    alphas = (alphas_cumprod[1:] / alphas_cumprod[:-1])
+    betas = 1 - alphas
+    assert timesteps >= 100
+    p = 4 / 5       # 1 - 1 / num_edge_classes
+    num_edges = average_num_nodes * (average_num_nodes - 1) / 2
+    # First 100 steps: only a few updates per graph
+    updates_per_graph = 1.2
+    beta_first = updates_per_graph / (p * num_edges)
+    betas[betas < beta_first] = beta_first
+    return np.array(betas)
+def gaussian_KL(q_mu, q_sigma):
+    """Computes the KL distance between a normal distribution and the standard normal.
+        Args:
+            q_mu: Mean of distribution q.
+            q_sigma: Standard deviation of distribution q.
+            p_mu: Mean of distribution p.
+            p_sigma: Standard deviation of distribution p.
+        Returns:
+            The KL distance, summed over all dimensions except the batch dim.
+        """
+    return sum_except_batch((torch.log(1 / q_sigma) + 0.5 * (q_sigma ** 2 + q_mu ** 2) - 0.5))
+def cdf_std_gaussian(x):
+    return 0.5 * (1. + torch.erf(x / math.sqrt(2)))
+def SNR(gamma):
+    """Computes signal to noise ratio (alpha^2/sigma^2) given gamma."""
+    return torch.exp(-gamma)
+def inflate_batch_array(array, target_shape):
+    """
+    Inflates the batch array (array) with only a single axis (i.e. shape = (batch_size,), or possibly more empty
+    axes (i.e. shape (batch_size, 1, ..., 1)) to match the target shape.
+    """
+    target_shape = (array.size(0),) + (1,) * (len(target_shape) - 1)
+    return array.view(target_shape)
+def sigma(gamma, target_shape):
+    """Computes sigma given gamma."""
+    return inflate_batch_array(torch.sqrt(torch.sigmoid(gamma)), target_shape)
+def alpha(gamma, target_shape):
+    """Computes alpha given gamma."""
+    return inflate_batch_array(torch.sqrt(torch.sigmoid(-gamma)), target_shape)
+def check_mask_correct(variables, node_mask):
+    for i, variable in enumerate(variables):
+        if len(variable) > 0:
+            assert_correctly_masked(variable, node_mask)
+def check_tensor_same_size(*args):
+    for i, arg in enumerate(args):
+        if i == 0:
+            continue
+        assert args[0].size() == arg.size()
+def sigma_and_alpha_t_given_s(gamma_t: torch.Tensor, gamma_s: torch.Tensor, target_size: torch.Size):
+    """
+    Computes sigma t given s, using gamma_t and gamma_s. Used during sampling.
+    These are defined as:
+        alpha t given s = alpha t / alpha s,
+        sigma t given s = sqrt(1 - (alpha t given s) ^2 ).
+    """
+    sigma2_t_given_s = inflate_batch_array(
+        -torch.expm1(F.softplus(gamma_s) - F.softplus(gamma_t)), target_size
+    )
+    # alpha_t_given_s = alpha_t / alpha_s
+    log_alpha2_t = F.logsigmoid(-gamma_t)
+    log_alpha2_s = F.logsigmoid(-gamma_s)
+    log_alpha2_t_given_s = log_alpha2_t - log_alpha2_s
+    alpha_t_given_s = torch.exp(0.5 * log_alpha2_t_given_s)
+    alpha_t_given_s = inflate_batch_array(alpha_t_given_s, target_size)
+    sigma_t_given_s = torch.sqrt(sigma2_t_given_s)
+    return sigma2_t_given_s, sigma_t_given_s, alpha_t_given_s
+def reverse_tensor(x):
+    return x[torch.arange(x.size(0) - 1, -1, -1)]
+def sample_feature_noise(X_size, E_size, y_size, node_mask):
+    """Standard normal noise for all features.
+        Output size: X.size(), E.size(), y.size() """
+    # TODO: How to change this for the multi-gpu case?
+    epsX = sample_gaussian(X_size)
+    epsE = sample_gaussian(E_size)
+    epsy = sample_gaussian(y_size)
+    float_mask = node_mask.float()
+    epsX = epsX.type_as(float_mask)
+    epsE = epsE.type_as(float_mask)
+    epsy = epsy.type_as(float_mask)
+    # Get upper triangular part of edge noise, without main diagonal
+    upper_triangular_mask = torch.zeros_like(epsE)
+    indices = torch.triu_indices(row=epsE.size(1), col=epsE.size(2), offset=1)
+    upper_triangular_mask[:, indices[0], indices[1], :] = 1
+    epsE = epsE * upper_triangular_mask
+    epsE = (epsE + torch.transpose(epsE, 1, 2))
+    assert (epsE == torch.transpose(epsE, 1, 2)).all()
+    return PlaceHolder(X=epsX, E=epsE, y=epsy).mask(node_mask)
+def sample_normal(mu_X, mu_E, mu_y, sigma, node_mask):
+    """Samples from a Normal distribution."""
+    # TODO: change for multi-gpu case
+    eps = sample_feature_noise(mu_X.size(), mu_E.size(), mu_y.size(), node_mask).type_as(mu_X)
+    X = mu_X + sigma * eps.X
+    E = mu_E + sigma.unsqueeze(1) * eps.E
+    y = mu_y + sigma.squeeze(1) * eps.y
+    return PlaceHolder(X=X, E=E, y=y)
+def check_issues_norm_values(gamma, norm_val1, norm_val2, num_stdevs=8):
+    """ Check if 1 / norm_value is still larger than 10 * standard deviation. """
+    zeros = torch.zeros((1, 1))
+    gamma_0 = gamma(zeros)
+    sigma_0 = sigma(gamma_0, target_shape=zeros.size()).item()
+    max_norm_value = max(norm_val1, norm_val2)
+    if sigma_0 * num_stdevs > 1. / max_norm_value:
+        raise ValueError(
+            f'Value for normalization value {max_norm_value} probably too '
+            f'large with sigma_0 {sigma_0:.5f} and '
+            f'1 / norm_value = {1. / max_norm_value}')
+def sample_discrete_features(probX, probE, node_mask):
+    ''' Sample features from multinomial distribution with given probabilities (probX, probE, proby)
+        :param probX: bs, n, dx_out        node features
+        :param probE: bs, n, n, de_out     edge features
+        :param proby: bs, dy_out           global features.
+    '''
+    bs, n, _ = probX.shape
+    # Noise X
+    # The masked rows should define probability distributions as well
+    probX[~node_mask] = 1 / probX.shape[-1]
+    # Flatten the probability tensor to sample with multinomial
+    probX = probX.reshape(bs * n, -1)       # (bs * n, dx_out)
+    # Sample X
+    X_t = probX.multinomial(1)                                  # (bs * n, 1)
+    X_t = X_t.reshape(bs, n)     # (bs, n)
+    # Noise E
+    # The masked rows should define probability distributions as well
+    inverse_edge_mask = ~(node_mask.unsqueeze(1) * node_mask.unsqueeze(2))
+    diag_mask = torch.eye(n).unsqueeze(0).expand(bs, -1, -1)
+    probE[inverse_edge_mask] = 1 / probE.shape[-1]
+    probE[diag_mask.bool()] = 1 / probE.shape[-1]
+    probE = probE.reshape(bs * n * n, -1)    # (bs * n * n, de_out)
+    # Sample E
+    E_t = probE.multinomial(1).reshape(bs, n, n)   # (bs, n, n)
+    E_t = torch.triu(E_t, diagonal=1)
+    E_t = (E_t + torch.transpose(E_t, 1, 2))
+    return PlaceHolder(X=X_t, E=E_t, y=torch.zeros(bs, 0).type_as(X_t))
+def compute_posterior_distribution(M, M_t, Qt_M, Qsb_M, Qtb_M):
+    ''' M: X or E
+        Compute xt @ Qt.T * x0 @ Qsb / x0 @ Qtb @ xt.T
+    '''
+    # Flatten feature tensors
+    M = M.flatten(start_dim=1, end_dim=-2).to(torch.float32)        # (bs, N, d) with N = n or n * n
+    M_t = M_t.flatten(start_dim=1, end_dim=-2).to(torch.float32)    # same
+    Qt_M_T = torch.transpose(Qt_M, -2, -1)      # (bs, d, d)
+    left_term = M_t @ Qt_M_T   # (bs, N, d)
+    right_term = M @ Qsb_M     # (bs, N, d)
+    product = left_term * right_term    # (bs, N, d)
+    denom = M @ Qtb_M     # (bs, N, d) @ (bs, d, d) = (bs, N, d)
+    denom = (denom * M_t).sum(dim=-1)   # (bs, N, d) * (bs, N, d) + sum = (bs, N)
+    # denom = product.sum(dim=-1)
+    # denom[denom == 0.] = 1
+    prob = product / denom.unsqueeze(-1)    # (bs, N, d)
+    return prob
+def compute_batched_over0_posterior_distribution(X_t, Qt, Qsb, Qtb):
+    """ M: X or E
+        Compute xt @ Qt.T * x0 @ Qsb / x0 @ Qtb @ xt.T for each possible value of x0
+        X_t: bs, n, dt          or bs, n, n, dt
+        Qt: bs, d_t-1, dt
+        Qsb: bs, d0, d_t-1
+        Qtb: bs, d0, dt.
+    """
+    # Flatten feature tensors
+    # Careful with this line. It does nothing if X is a node feature. If X is an edge features it maps to
+    # bs x (n ** 2) x d
+    X_t = X_t.flatten(start_dim=1, end_dim=-2).to(torch.float32)            # bs x N x dt
+    Qt_T = Qt.transpose(-1, -2)                 # bs, dt, d_t-1
+    left_term = X_t @ Qt_T                      # bs, N, d_t-1
+    left_term = left_term.unsqueeze(dim=2)      # bs, N, 1, d_t-1
+    right_term = Qsb.unsqueeze(1)               # bs, 1, d0, d_t-1
+    numerator = left_term * right_term          # bs, N, d0, d_t-1
+    X_t_transposed = X_t.transpose(-1, -2)      # bs, dt, N
+    prod = Qtb @ X_t_transposed                 # bs, d0, N
+    prod = prod.transpose(-1, -2)               # bs, N, d0
+    denominator = prod.unsqueeze(-1)            # bs, N, d0, 1
+    denominator[denominator == 0] = 1e-6
+    out = numerator / denominator
+    return out
+def mask_distributions(true_X, true_E, pred_X, pred_E, node_mask):
+    """
+    Set masked rows to arbitrary distributions, so it doesn't contribute to loss
+    :param true_X: bs, n, dx_out
+    :param true_E: bs, n, n, de_out
+    :param pred_X: bs, n, dx_out
+    :param pred_E: bs, n, n, de_out
+    :param node_mask: bs, n
+    :return: same sizes as input
+    """
+    row_X = torch.zeros(true_X.size(-1), dtype=torch.float, device=true_X.device)
+    row_X[0] = 1.
+    row_E = torch.zeros(true_E.size(-1), dtype=torch.float, device=true_E.device)
+    row_E[0] = 1.
+    diag_mask = ~torch.eye(node_mask.size(1), device=node_mask.device, dtype=torch.bool).unsqueeze(0)
+    true_X[~node_mask] = row_X
+    pred_X[~node_mask] = row_X
+    true_E[~(node_mask.unsqueeze(1) * node_mask.unsqueeze(2) * diag_mask), :] = row_E
+    pred_E[~(node_mask.unsqueeze(1) * node_mask.unsqueeze(2) * diag_mask), :] = row_E
+    true_X = true_X + 1e-7
+    pred_X = pred_X + 1e-7
+    true_E = true_E + 1e-7
+    pred_E = pred_E + 1e-7
+    true_X = true_X / torch.sum(true_X, dim=-1, keepdim=True)
+    pred_X = pred_X / torch.sum(pred_X, dim=-1, keepdim=True)
+    true_E = true_E / torch.sum(true_E, dim=-1, keepdim=True)
+    pred_E = pred_E / torch.sum(pred_E, dim=-1, keepdim=True)
+    return true_X, true_E, pred_X, pred_E
+def posterior_distributions(X, E, y, X_t, E_t, y_t, Qt, Qsb, Qtb):
+    prob_X = compute_posterior_distribution(M=X, M_t=X_t, Qt_M=Qt.X, Qsb_M=Qsb.X, Qtb_M=Qtb.X)   # (bs, n, dx)
+    prob_E = compute_posterior_distribution(M=E, M_t=E_t, Qt_M=Qt.E, Qsb_M=Qsb.E, Qtb_M=Qtb.E)   # (bs, n * n, de)
+    return PlaceHolder(X=prob_X, E=prob_E, y=y_t)
+def sample_discrete_feature_noise(limit_dist, node_mask, transition):
+    """ Sample from the limit distribution of the diffusion process"""
+    bs, n_max = node_mask.shape
+    x_limit = limit_dist.X[None, None, :].expand(bs, n_max, -1)
+    e_limit = limit_dist.E[None, None, None, :].expand(bs, n_max, n_max, -1)
+    y_limit = limit_dist.y[None, :].expand(bs, -1)
+    U_X = x_limit.flatten(end_dim=-2).multinomial(1).reshape(bs, n_max)
+    U_E = e_limit.flatten(end_dim=-2).multinomial(1).reshape(bs, n_max, n_max)
+    # print(U_E.shape, U_X.shape, y_limit.shape)
+    U_y = torch.empty((bs, 0))
+    long_mask = node_mask.long()
+    U_X = U_X.type_as(long_mask)
+    U_E = U_E.type_as(long_mask)
+    U_y = U_y.type_as(long_mask)
+    U_X = F.one_hot(U_X, num_classes=x_limit.shape[-1]).float()
+    U_E = F.one_hot(U_E, num_classes=e_limit.shape[-1]).float()
+    # Get upper triangular part of edge noise, without main diagonal
+    upper_triangular_mask = torch.zeros_like(U_E)
+    indices = torch.triu_indices(row=U_E.size(1), col=U_E.size(2), offset=1)
+    upper_triangular_mask[:, indices[0], indices[1], :] = 1
+    U_E = U_E * upper_triangular_mask
+    U_E = (U_E + torch.transpose(U_E, 1, 2))
+    assert (U_E == torch.transpose(U_E, 1, 2)).all()
+    # print(U_X.shape, limit_dist.cond.shape)
+    return PlaceHolder(X=U_X, E=U_E, y=U_y).mask(node_mask)

diffusion/distributions.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import torch
+class DistributionNodes:
+    def __init__(self, histogram):
+        """ Compute the distribution of the number of nodes in the dataset, and sample from this distribution.
+            historgram: dict. The keys are num_nodes, the values are counts
+        """
+        if type(histogram) == dict:
+            max_n_nodes = max(histogram.keys())
+            prob = torch.zeros(max_n_nodes + 1)
+            for num_nodes, count in histogram.items():
+                prob[num_nodes] = count
+        else:
+            prob = histogram
+        self.prob = prob / prob.sum()
+        self.m = torch.distributions.Categorical(prob)
+    def sample_n(self, n_samples, device):
+        idx = self.m.sample((n_samples,))
+        return idx.to(device)
+    def log_prob(self, batch_n_nodes):
+        assert len(batch_n_nodes.size()) == 1
+        p = self.prob.to(batch_n_nodes.device)
+        probas = p[batch_n_nodes]
+        log_p = torch.log(probas + 1e-30)
+        return log_p

diffusion/extra_features.py ADDED Viewed

	@@ -0,0 +1,275 @@

+import torch
+from src import utils
+class DummyExtraFeatures:
+    def __init__(self):
+        """ This class does not compute anything, just returns empty tensors."""
+    def __call__(self, noisy_data):
+        X = noisy_data['X_t']
+        E = noisy_data['E_t']
+        y = noisy_data['y_t']
+        empty_x = X.new_zeros((*X.shape[:-1], 0))
+        empty_e = E.new_zeros((*E.shape[:-1], 0))
+        empty_y = y.new_zeros((y.shape[0], 0))
+        return utils.PlaceHolder(X=empty_x, E=empty_e, y=empty_y)
+class ExtraFeatures:
+    def __init__(self, extra_features_type, dataset_info):
+        self.max_n_nodes = dataset_info.max_n_nodes
+        self.ncycles = NodeCycleFeatures()
+        self.features_type = extra_features_type
+        if extra_features_type in ['eigenvalues', 'all']:
+            self.eigenfeatures = EigenFeatures(mode=extra_features_type)
+    def __call__(self, noisy_data):
+        n = noisy_data['node_mask'].sum(dim=1).unsqueeze(1) / self.max_n_nodes
+        x_cycles, y_cycles = self.ncycles(noisy_data)       # (bs, n_cycles)
+        if self.features_type == 'cycles':
+            E = noisy_data['E_t']
+            extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E)
+            return utils.PlaceHolder(X=x_cycles, E=extra_edge_attr, y=torch.hstack((n, y_cycles)))
+        elif self.features_type == 'eigenvalues':
+            eigenfeatures = self.eigenfeatures(noisy_data)
+            E = noisy_data['E_t']
+            extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E)
+            n_components, batched_eigenvalues = eigenfeatures   # (bs, 1), (bs, 10)
+            return utils.PlaceHolder(X=x_cycles, E=extra_edge_attr, y=torch.hstack((n, y_cycles, n_components,
+                                                                                    batched_eigenvalues)))
+        elif self.features_type == 'all':
+            eigenfeatures = self.eigenfeatures(noisy_data)
+            E = noisy_data['E_t']
+            extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E)
+            n_components, batched_eigenvalues, nonlcc_indicator, k_lowest_eigvec = eigenfeatures   # (bs, 1), (bs, 10),
+                                                                                                # (bs, n, 1), (bs, n, 2)
+            return utils.PlaceHolder(X=torch.cat((x_cycles, nonlcc_indicator, k_lowest_eigvec), dim=-1),
+                                     E=extra_edge_attr,
+                                     y=torch.hstack((n, y_cycles, n_components, batched_eigenvalues)))
+        else:
+            raise ValueError(f"Features type {self.features_type} not implemented")
+class NodeCycleFeatures:
+    def __init__(self):
+        self.kcycles = KNodeCycles()
+    def __call__(self, noisy_data):
+        adj_matrix = noisy_data['E_t'][..., 1:].sum(dim=-1).float()
+        x_cycles, y_cycles = self.kcycles.k_cycles(adj_matrix=adj_matrix)   # (bs, n_cycles)
+        x_cycles = x_cycles.type_as(adj_matrix) * noisy_data['node_mask'].unsqueeze(-1)
+        # Avoid large values when the graph is dense
+        x_cycles = x_cycles / 10
+        y_cycles = y_cycles / 10
+        x_cycles[x_cycles > 1] = 1
+        y_cycles[y_cycles > 1] = 1
+        return x_cycles, y_cycles
+class EigenFeatures:
+    """
+    Code taken from : https://github.com/Saro00/DGN/blob/master/models/pytorch/eigen_agg.py
+    """
+    def __init__(self, mode):
+        """ mode: 'eigenvalues' or 'all' """
+        self.mode = mode
+    def __call__(self, noisy_data):
+        E_t = noisy_data['E_t']
+        mask = noisy_data['node_mask']
+        A = E_t[..., 1:].sum(dim=-1).float() * mask.unsqueeze(1) * mask.unsqueeze(2)
+        L = compute_laplacian(A, normalize=False)
+        mask_diag = 2 * L.shape[-1] * torch.eye(A.shape[-1]).type_as(L).unsqueeze(0)
+        mask_diag = mask_diag * (~mask.unsqueeze(1)) * (~mask.unsqueeze(2))
+        L = L * mask.unsqueeze(1) * mask.unsqueeze(2) + mask_diag
+        if self.mode == 'eigenvalues':
+            eigvals = torch.linalg.eigvalsh(L)        # bs, n
+            eigvals = eigvals.type_as(A) / torch.sum(mask, dim=1, keepdim=True)
+            n_connected_comp, batch_eigenvalues = get_eigenvalues_features(eigenvalues=eigvals)
+            return n_connected_comp.type_as(A), batch_eigenvalues.type_as(A)
+        elif self.mode == 'all':
+            eigvals, eigvectors = torch.linalg.eigh(L)
+            eigvals = eigvals.type_as(A) / torch.sum(mask, dim=1, keepdim=True)
+            eigvectors = eigvectors * mask.unsqueeze(2) * mask.unsqueeze(1)
+            # Retrieve eigenvalues features
+            n_connected_comp, batch_eigenvalues = get_eigenvalues_features(eigenvalues=eigvals)
+            # Retrieve eigenvectors features
+            nonlcc_indicator, k_lowest_eigenvector = get_eigenvectors_features(vectors=eigvectors,
+                                                                               node_mask=noisy_data['node_mask'],
+                                                                               n_connected=n_connected_comp)
+            return n_connected_comp, batch_eigenvalues, nonlcc_indicator, k_lowest_eigenvector
+        else:
+            raise NotImplementedError(f"Mode {self.mode} is not implemented")
+def compute_laplacian(adjacency, normalize: bool):
+    """
+    adjacency : batched adjacency matrix (bs, n, n)
+    normalize: can be None, 'sym' or 'rw' for the combinatorial, symmetric normalized or random walk Laplacians
+    Return:
+        L (n x n ndarray): combinatorial or symmetric normalized Laplacian.
+    """
+    diag = torch.sum(adjacency, dim=-1)     # (bs, n)
+    n = diag.shape[-1]
+    D = torch.diag_embed(diag)      # Degree matrix      # (bs, n, n)
+    combinatorial = D - adjacency                        # (bs, n, n)
+    if not normalize:
+        return (combinatorial + combinatorial.transpose(1, 2)) / 2
+    diag0 = diag.clone()
+    diag[diag == 0] = 1e-12
+    diag_norm = 1 / torch.sqrt(diag)            # (bs, n)
+    D_norm = torch.diag_embed(diag_norm)        # (bs, n, n)
+    L = torch.eye(n).unsqueeze(0) - D_norm @ adjacency @ D_norm
+    L[diag0 == 0] = 0
+    return (L + L.transpose(1, 2)) / 2
+def get_eigenvalues_features(eigenvalues, k=5):
+    """
+    values : eigenvalues -- (bs, n)
+    node_mask: (bs, n)
+    k: num of non zero eigenvalues to keep
+    """
+    ev = eigenvalues
+    bs, n = ev.shape
+    n_connected_components = (ev < 1e-5).sum(dim=-1)
+    assert (n_connected_components > 0).all(), (n_connected_components, ev)
+    to_extend = max(n_connected_components) + k - n
+    if to_extend > 0:
+        eigenvalues = torch.hstack((eigenvalues, 2 * torch.ones(bs, to_extend).type_as(eigenvalues)))
+    indices = torch.arange(k).type_as(eigenvalues).long().unsqueeze(0) + n_connected_components.unsqueeze(1)
+    first_k_ev = torch.gather(eigenvalues, dim=1, index=indices)
+    return n_connected_components.unsqueeze(-1), first_k_ev
+def get_eigenvectors_features(vectors, node_mask, n_connected, k=2):
+    """
+    vectors (bs, n, n) : eigenvectors of Laplacian IN COLUMNS
+    returns:
+        not_lcc_indicator : indicator vectors of largest connected component (lcc) for each graph  -- (bs, n, 1)
+        k_lowest_eigvec : k first eigenvectors for the largest connected component   -- (bs, n, k)
+    """
+    bs, n = vectors.size(0), vectors.size(1)
+    # Create an indicator for the nodes outside the largest connected components
+    first_ev = torch.round(vectors[:, :, 0], decimals=3) * node_mask                        # bs, n
+    # Add random value to the mask to prevent 0 from becoming the mode
+    random = torch.randn(bs, n, device=node_mask.device) * (~node_mask)                                   # bs, n
+    first_ev = first_ev + random
+    most_common = torch.mode(first_ev, dim=1).values                                    # values: bs -- indices: bs
+    mask = ~ (first_ev == most_common.unsqueeze(1))
+    not_lcc_indicator = (mask * node_mask).unsqueeze(-1).float()
+    # Get the eigenvectors corresponding to the first nonzero eigenvalues
+    to_extend = max(n_connected) + k - n
+    if to_extend > 0:
+        vectors = torch.cat((vectors, torch.zeros(bs, n, to_extend).type_as(vectors)), dim=2)   # bs, n , n + to_extend
+    indices = torch.arange(k).type_as(vectors).long().unsqueeze(0).unsqueeze(0) + n_connected.unsqueeze(2)    # bs, 1, k
+    indices = indices.expand(-1, n, -1)                                               # bs, n, k
+    first_k_ev = torch.gather(vectors, dim=2, index=indices)       # bs, n, k
+    first_k_ev = first_k_ev * node_mask.unsqueeze(2)
+    return not_lcc_indicator, first_k_ev
+def batch_trace(X):
+    """
+    Expect a matrix of shape B N N, returns the trace in shape B
+    :param X:
+    :return:
+    """
+    diag = torch.diagonal(X, dim1=-2, dim2=-1)
+    trace = diag.sum(dim=-1)
+    return trace
+def batch_diagonal(X):
+    """
+    Extracts the diagonal from the last two dims of a tensor
+    :param X:
+    :return:
+    """
+    return torch.diagonal(X, dim1=-2, dim2=-1)
+class KNodeCycles:
+    """ Builds cycle counts for each node in a graph.
+    """
+    def __init__(self):
+        super().__init__()
+    def calculate_kpowers(self):
+        self.k1_matrix = self.adj_matrix.float()
+        self.d = self.adj_matrix.sum(dim=-1)
+        self.k2_matrix = self.k1_matrix @ self.adj_matrix.float()
+        self.k3_matrix = self.k2_matrix @ self.adj_matrix.float()
+        self.k4_matrix = self.k3_matrix @ self.adj_matrix.float()
+        self.k5_matrix = self.k4_matrix @ self.adj_matrix.float()
+        self.k6_matrix = self.k5_matrix @ self.adj_matrix.float()
+    def k3_cycle(self):
+        """ tr(A ** 3). """
+        c3 = batch_diagonal(self.k3_matrix)
+        return (c3 / 2).unsqueeze(-1).float(), (torch.sum(c3, dim=-1) / 6).unsqueeze(-1).float()
+    def k4_cycle(self):
+        diag_a4 = batch_diagonal(self.k4_matrix)
+        c4 = diag_a4 - self.d * (self.d - 1) - (self.adj_matrix @ self.d.unsqueeze(-1)).sum(dim=-1)
+        return (c4 / 2).unsqueeze(-1).float(), (torch.sum(c4, dim=-1) / 8).unsqueeze(-1).float()
+    def k5_cycle(self):
+        diag_a5 = batch_diagonal(self.k5_matrix)
+        triangles = batch_diagonal(self.k3_matrix)
+        c5 = diag_a5 - 2 * triangles * self.d - (self.adj_matrix @ triangles.unsqueeze(-1)).sum(dim=-1) + triangles
+        return (c5 / 2).unsqueeze(-1).float(), (c5.sum(dim=-1) / 10).unsqueeze(-1).float()
+    def k6_cycle(self):
+        term_1_t = batch_trace(self.k6_matrix)
+        term_2_t = batch_trace(self.k3_matrix ** 2)
+        term3_t = torch.sum(self.adj_matrix * self.k2_matrix.pow(2), dim=[-2, -1])
+        d_t4 = batch_diagonal(self.k2_matrix)
+        a_4_t = batch_diagonal(self.k4_matrix)
+        term_4_t = (d_t4 * a_4_t).sum(dim=-1)
+        term_5_t = batch_trace(self.k4_matrix)
+        term_6_t = batch_trace(self.k3_matrix)
+        term_7_t = batch_diagonal(self.k2_matrix).pow(3).sum(-1)
+        term8_t = torch.sum(self.k3_matrix, dim=[-2, -1])
+        term9_t = batch_diagonal(self.k2_matrix).pow(2).sum(-1)
+        term10_t = batch_trace(self.k2_matrix)
+        c6_t = (term_1_t - 3 * term_2_t + 9 * term3_t - 6 * term_4_t + 6 * term_5_t - 4 * term_6_t + 4 * term_7_t +
+                3 * term8_t - 12 * term9_t + 4 * term10_t)
+        return None, (c6_t / 12).unsqueeze(-1).float()
+    def k_cycles(self, adj_matrix, verbose=False):
+        self.adj_matrix = adj_matrix
+        self.calculate_kpowers()
+        k3x, k3y = self.k3_cycle()
+        assert (k3x >= -0.1).all()
+        k4x, k4y = self.k4_cycle()
+        assert (k4x >= -0.1).all()
+        k5x, k5y = self.k5_cycle()
+        assert (k5x >= -0.1).all(), k5x
+        _, k6y = self.k6_cycle()
+        assert (k6y >= -0.1).all()
+        kcyclesx = torch.cat([k3x, k4x, k5x], dim=-1)
+        kcyclesy = torch.cat([k3y, k4y, k5y, k6y], dim=-1)
+        return kcyclesx, kcyclesy

diffusion/extra_features_molecular.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import torch
+from src import utils
+class ExtraMolecularFeatures:
+    def __init__(self, dataset_infos):
+        self.charge = ChargeFeature(remove_h=dataset_infos.remove_h, valencies=dataset_infos.valencies)
+        self.valency = ValencyFeature()
+        self.weight = WeightFeature(max_weight=dataset_infos.max_weight, atom_weights=dataset_infos.atom_weights)
+    def __call__(self, noisy_data):
+        charge = self.charge(noisy_data).unsqueeze(-1)      # (bs, n, 1)
+        valency = self.valency(noisy_data).unsqueeze(-1)    # (bs, n, 1)
+        weight = self.weight(noisy_data)                    # (bs, 1)
+        extra_edge_attr = torch.zeros((*noisy_data['E_t'].shape[:-1], 0)).type_as(noisy_data['E_t'])
+        return utils.PlaceHolder(X=torch.cat((charge, valency), dim=-1), E=extra_edge_attr, y=weight)
+class ChargeFeature:
+    def __init__(self, remove_h, valencies):
+        self.remove_h = remove_h
+        self.valencies = valencies
+    def __call__(self, noisy_data):
+        bond_orders = torch.tensor([0, 1, 2, 3, 1.5], device=noisy_data['E_t'].device).reshape(1, 1, 1, -1)
+        weighted_E = noisy_data['E_t'] * bond_orders      # (bs, n, n, de)
+        current_valencies = weighted_E.argmax(dim=-1).sum(dim=-1)   # (bs, n)
+        valencies = torch.tensor(self.valencies, device=noisy_data['X_t'].device).reshape(1, 1, -1)
+        X = noisy_data['X_t'] * valencies  # (bs, n, dx)
+        normal_valencies = torch.argmax(X, dim=-1)               # (bs, n)
+        return (normal_valencies - current_valencies).type_as(noisy_data['X_t'])
+class ValencyFeature:
+    def __init__(self):
+        pass
+    def __call__(self, noisy_data):
+        orders = torch.tensor([0, 1, 2, 3, 1.5], device=noisy_data['E_t'].device).reshape(1, 1, 1, -1)
+        E = noisy_data['E_t'] * orders      # (bs, n, n, de)
+        valencies = E.argmax(dim=-1).sum(dim=-1)    # (bs, n)
+        return valencies.type_as(noisy_data['X_t'])
+class WeightFeature:
+    def __init__(self, max_weight, atom_weights):
+        self.max_weight = max_weight
+        self.atom_weight_list = torch.tensor(list(atom_weights.values()))
+    def __call__(self, noisy_data):
+        X = torch.argmax(noisy_data['X_t'], dim=-1)     # (bs, n)
+        X_weights = self.atom_weight_list.to(X.device)[X]           # (bs, n)
+        return X_weights.sum(dim=-1).unsqueeze(-1).type_as(noisy_data['X_t']) / self.max_weight     # (bs, 1)

diffusion/layers.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import math
+import torch
+class SinusoidalPosEmb(torch.nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.dim = dim
+    def forward(self, x):
+        x = x.squeeze() * 1000
+        assert len(x.shape) == 1
+        half_dim = self.dim // 2
+        emb = math.log(10000) / (half_dim - 1)
+        emb = torch.exp(torch.arange(half_dim) * -emb)
+        emb = emb.type_as(x)
+        emb = x[:, None] * emb[None, :]
+        emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
+        return emb

diffusion/noise_schedule.py ADDED Viewed

	@@ -0,0 +1,225 @@

+import numpy as np
+import torch
+import utils
+from diffusion import diffusion_utils
+class PredefinedNoiseSchedule(torch.nn.Module):
+    """
+    Predefined noise schedule. Essentially creates a lookup array for predefined (non-learned) noise schedules.
+    """
+    def __init__(self, noise_schedule, timesteps):
+        super(PredefinedNoiseSchedule, self).__init__()
+        self.timesteps = timesteps
+        if noise_schedule == 'cosine':
+            alphas2 = diffusion_utils.cosine_beta_schedule(timesteps)
+        elif noise_schedule == 'custom':
+            raise NotImplementedError()
+        else:
+            raise ValueError(noise_schedule)
+        # print('alphas2', alphas2)
+        sigmas2 = 1 - alphas2
+        log_alphas2 = np.log(alphas2)
+        log_sigmas2 = np.log(sigmas2)
+        log_alphas2_to_sigmas2 = log_alphas2 - log_sigmas2     # (timesteps + 1, )
+        # print('gamma', -log_alphas2_to_sigmas2)
+        self.gamma = torch.nn.Parameter(
+            torch.from_numpy(-log_alphas2_to_sigmas2).float(),
+            requires_grad=False)
+    def forward(self, t):
+        t_int = torch.round(t * self.timesteps).long()
+        return self.gamma[t_int]
+class PredefinedNoiseScheduleDiscrete(torch.nn.Module):
+    """
+    Predefined noise schedule. Essentially creates a lookup array for predefined (non-learned) noise schedules.
+    """
+    def __init__(self, noise_schedule, timesteps):
+        super(PredefinedNoiseScheduleDiscrete, self).__init__()
+        self.timesteps = timesteps
+        if noise_schedule == 'cosine':
+            betas = diffusion_utils.cosine_beta_schedule_discrete(timesteps)
+        elif noise_schedule == 'custom':
+            betas = diffusion_utils.custom_beta_schedule_discrete(timesteps)
+        else:
+            raise NotImplementedError(noise_schedule)
+        self.register_buffer('betas', torch.from_numpy(betas).float())
+        self.alphas = 1 - torch.clamp(self.betas, min=0, max=0.9999)
+        log_alpha = torch.log(self.alphas)
+        log_alpha_bar = torch.cumsum(log_alpha, dim=0)
+        self.alphas_bar = torch.exp(log_alpha_bar)
+        # print(f"[Noise schedule: {noise_schedule}] alpha_bar:", self.alphas_bar)
+    def forward(self, t_normalized=None, t_int=None):
+        assert int(t_normalized is None) + int(t_int is None) == 1
+        if t_int is None:
+            t_int = torch.round(t_normalized * self.timesteps)
+        return self.betas[t_int.long()]
+    def get_alpha_bar(self, t_normalized=None, t_int=None):
+        assert int(t_normalized is None) + int(t_int is None) == 1
+        if t_int is None:
+            t_int = torch.round(t_normalized * self.timesteps)
+        return self.alphas_bar.to(t_int.device)[t_int.long()]
+class DiscreteUniformTransition:
+    def __init__(self, x_classes: int, e_classes: int, y_classes: int):
+        self.X_classes = x_classes
+        self.E_classes = e_classes
+        self.y_classes = y_classes
+        self.u_x = torch.ones(1, self.X_classes, self.X_classes)
+        if self.X_classes > 0:
+            self.u_x = self.u_x / self.X_classes
+        self.u_e = torch.ones(1, self.E_classes, self.E_classes)
+        if self.E_classes > 0:
+            self.u_e = self.u_e / self.E_classes
+        self.u_y = torch.ones(1, self.y_classes, self.y_classes)
+        if self.y_classes > 0:
+            self.u_y = self.u_y / self.y_classes
+    def get_Qt(self, beta_t, device):
+        """ Returns one-step transition matrices for X and E, from step t - 1 to step t.
+        Qt = (1 - beta_t) * I + beta_t / K
+        beta_t: (bs)                         noise level between 0 and 1
+        returns: qx (bs, dx, dx), qe (bs, de, de), qy (bs, dy, dy).
+        """
+        beta_t = beta_t.unsqueeze(1)
+        beta_t = beta_t.to(device)
+        self.u_x = self.u_x.to(device)
+        self.u_e = self.u_e.to(device)
+        self.u_y = self.u_y.to(device)
+        q_x = beta_t * self.u_x + (1 - beta_t) * torch.eye(self.X_classes, device=device).unsqueeze(0)
+        q_e = beta_t * self.u_e + (1 - beta_t) * torch.eye(self.E_classes, device=device).unsqueeze(0)
+        q_y = beta_t * self.u_y + (1 - beta_t) * torch.eye(self.y_classes, device=device).unsqueeze(0)
+        return utils.PlaceHolder(X=q_x, E=q_e, y=q_y)
+    def get_Qt_bar(self, alpha_bar_t, device):
+        """ Returns t-step transition matrices for X and E, from step 0 to step t.
+        Qt = prod(1 - beta_t) * I + (1 - prod(1 - beta_t)) / K
+        alpha_bar_t: (bs)         Product of the (1 - beta_t) for each time step from 0 to t.
+        returns: qx (bs, dx, dx), qe (bs, de, de), qy (bs, dy, dy).
+        """
+        alpha_bar_t = alpha_bar_t.unsqueeze(1)
+        alpha_bar_t = alpha_bar_t.to(device)
+        self.u_x = self.u_x.to(device)
+        self.u_e = self.u_e.to(device)
+        self.u_y = self.u_y.to(device)
+        q_x = alpha_bar_t * torch.eye(self.X_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_x
+        q_e = alpha_bar_t * torch.eye(self.E_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_e
+        q_y = alpha_bar_t * torch.eye(self.y_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_y
+        return utils.PlaceHolder(X=q_x, E=q_e, y=q_y)
+class MarginalUniformTransition:
+    def __init__(self, x_marginals, e_marginals, y_classes):
+        self.X_classes = len(x_marginals)
+        self.E_classes = len(e_marginals)
+        self.y_classes = y_classes
+        self.x_marginals = x_marginals
+        self.e_marginals = e_marginals
+        self.u_x = x_marginals.unsqueeze(0).expand(self.X_classes, -1).unsqueeze(0)
+        self.u_e = e_marginals.unsqueeze(0).expand(self.E_classes, -1).unsqueeze(0)
+        self.u_y = torch.ones(1, self.y_classes, self.y_classes)
+        if self.y_classes > 0:
+            self.u_y = self.u_y / self.y_classes
+    def get_Qt(self, beta_t, device):
+        """ Returns one-step transition matrices for X and E, from step t - 1 to step t.
+        Qt = (1 - beta_t) * I + beta_t / K
+        beta_t: (bs)                         noise level between 0 and 1
+        returns: qx (bs, dx, dx), qe (bs, de, de), qy (bs, dy, dy). """
+        beta_t = beta_t.unsqueeze(1)
+        beta_t = beta_t.to(device)
+        self.u_x = self.u_x.to(device)
+        self.u_e = self.u_e.to(device)
+        self.u_y = self.u_y.to(device)
+        q_x = beta_t * self.u_x + (1 - beta_t) * torch.eye(self.X_classes, device=device).unsqueeze(0)
+        q_e = beta_t * self.u_e + (1 - beta_t) * torch.eye(self.E_classes, device=device).unsqueeze(0)
+        q_y = beta_t * self.u_y + (1 - beta_t) * torch.eye(self.y_classes, device=device).unsqueeze(0)
+        return utils.PlaceHolder(X=q_x, E=q_e, y=q_y)
+    def get_Qt_bar(self, alpha_bar_t, device):
+        """ Returns t-step transition matrices for X and E, from step 0 to step t.
+        Qt = prod(1 - beta_t) * I + (1 - prod(1 - beta_t)) * K
+        alpha_bar_t: (bs)         Product of the (1 - beta_t) for each time step from 0 to t.
+        returns: qx (bs, dx, dx), qe (bs, de, de), qy (bs, dy, dy).
+        """
+        alpha_bar_t = alpha_bar_t.unsqueeze(1)
+        alpha_bar_t = alpha_bar_t.to(device)
+        self.u_x = self.u_x.to(device)
+        self.u_e = self.u_e.to(device)
+        self.u_y = self.u_y.to(device)
+        q_x = alpha_bar_t * torch.eye(self.X_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_x
+        q_e = alpha_bar_t * torch.eye(self.E_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_e
+        q_y = alpha_bar_t * torch.eye(self.y_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_y
+        return utils.PlaceHolder(X=q_x, E=q_e, y=q_y)
+class AbsorbingStateTransition:
+    def __init__(self, abs_state: int, x_classes: int, e_classes: int, y_classes: int):
+        self.X_classes = x_classes
+        self.E_classes = e_classes
+        self.y_classes = y_classes
+        self.u_x = torch.zeros(1, self.X_classes, self.X_classes)
+        self.u_x[:, :, abs_state] = 1
+        self.u_e = torch.zeros(1, self.E_classes, self.E_classes)
+        self.u_e[:, :, abs_state] = 1
+        self.u_y = torch.zeros(1, self.y_classes, self.y_classes)
+        self.u_e[:, :, abs_state] = 1
+    def get_Qt(self, beta_t):
+        """ Returns two transition matrix for X and E"""
+        beta_t = beta_t.unsqueeze(1)
+        q_x = beta_t * self.u_x + (1 - beta_t) * torch.eye(self.X_classes).unsqueeze(0)
+        q_e = beta_t * self.u_e + (1 - beta_t) * torch.eye(self.E_classes).unsqueeze(0)
+        q_y = beta_t * self.u_y + (1 - beta_t) * torch.eye(self.y_classes).unsqueeze(0)
+        return q_x, q_e, q_y
+    def get_Qt_bar(self, alpha_bar_t):
+        """ beta_t: (bs)
+        Returns transition matrices for X and E"""
+        alpha_bar_t = alpha_bar_t.unsqueeze(1)
+        q_x = alpha_bar_t * torch.eye(self.X_classes).unsqueeze(0) + (1 - alpha_bar_t) * self.u_x
+        q_e = alpha_bar_t * torch.eye(self.E_classes).unsqueeze(0) + (1 - alpha_bar_t) * self.u_e
+        q_y = alpha_bar_t * torch.eye(self.y_classes).unsqueeze(0) + (1 - alpha_bar_t) * self.u_y
+        return q_x, q_e, q_y

diffusion/utils.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import os
+import torch_geometric.utils
+from omegaconf import OmegaConf, open_dict
+from torch_geometric.utils import to_dense_adj, to_dense_batch
+import torch
+import omegaconf
+import wandb
+def create_folders(args):
+    try:
+        # os.makedirs('checkpoints')
+        os.makedirs('graphs')
+        os.makedirs('chains')
+    except OSError:
+        pass
+    try:
+        # os.makedirs('checkpoints/' + args.general.name)
+        os.makedirs('graphs/' + args.general.name)
+        os.makedirs('chains/' + args.general.name)
+    except OSError:
+        pass
+def normalize(X, E, y, norm_values, norm_biases, node_mask):
+    X = (X - norm_biases[0]) / norm_values[0]
+    E = (E - norm_biases[1]) / norm_values[1]
+    y = (y - norm_biases[2]) / norm_values[2]
+    diag = torch.eye(E.shape[1], dtype=torch.bool).unsqueeze(0).expand(E.shape[0], -1, -1)
+    E[diag] = 0
+    return PlaceHolder(X=X, E=E, y=y).mask(node_mask)
+def unnormalize(X, E, y, norm_values, norm_biases, node_mask, collapse=False):
+    """
+    X : node features
+    E : edge features
+    y : global features`
+    norm_values : [norm value X, norm value E, norm value y]
+    norm_biases : same order
+    node_mask
+    """
+    X = (X * norm_values[0] + norm_biases[0])
+    E = (E * norm_values[1] + norm_biases[1])
+    y = y * norm_values[2] + norm_biases[2]
+    return PlaceHolder(X=X, E=E, y=y).mask(node_mask, collapse)
+def to_dense(x, edge_index, edge_attr, batch):
+    X, node_mask = to_dense_batch(x=x, batch=batch)
+    # node_mask = node_mask.float()
+    edge_index, edge_attr = torch_geometric.utils.remove_self_loops(edge_index, edge_attr)
+    # TODO: carefully check if setting node_mask as a bool breaks the continuous case
+    max_num_nodes = X.size(1)
+    E = to_dense_adj(edge_index=edge_index, batch=batch, edge_attr=edge_attr, max_num_nodes=max_num_nodes)
+    E = encode_no_edge(E)
+    return PlaceHolder(X=X, E=E, y=None), node_mask
+def encode_no_edge(E):
+    assert len(E.shape) == 4
+    if E.shape[-1] == 0:
+        return E
+    no_edge = torch.sum(E, dim=3) == 0
+    first_elt = E[:, :, :, 0]
+    first_elt[no_edge] = 1
+    E[:, :, :, 0] = first_elt
+    diag = torch.eye(E.shape[1], dtype=torch.bool).unsqueeze(0).expand(E.shape[0], -1, -1)
+    E[diag] = 0
+    return E
+def update_config_with_new_keys(cfg, saved_cfg):
+    saved_general = saved_cfg.general
+    saved_train = saved_cfg.train
+    saved_model = saved_cfg.model
+    for key, val in saved_general.items():
+        OmegaConf.set_struct(cfg.general, True)
+        with open_dict(cfg.general):
+            if key not in cfg.general.keys():
+                setattr(cfg.general, key, val)
+    OmegaConf.set_struct(cfg.train, True)
+    with open_dict(cfg.train):
+        for key, val in saved_train.items():
+            if key not in cfg.train.keys():
+                setattr(cfg.train, key, val)
+    OmegaConf.set_struct(cfg.model, True)
+    with open_dict(cfg.model):
+        for key, val in saved_model.items():
+            if key not in cfg.model.keys():
+                setattr(cfg.model, key, val)
+    return cfg
+class PlaceHolder:
+    def __init__(self, X, E, y):
+        self.X = X
+        self.E = E
+        self.y = y
+    def type_as(self, x: torch.Tensor):
+        """ Changes the device and dtype of X, E, y. """
+        self.X = self.X.type_as(x)
+        self.E = self.E.type_as(x)
+        self.y = self.y.type_as(x)
+        return self
+    def mask(self, node_mask, collapse=False):
+        x_mask = node_mask.unsqueeze(-1)          # bs, n, 1
+        e_mask1 = x_mask.unsqueeze(2)             # bs, n, 1, 1
+        e_mask2 = x_mask.unsqueeze(1)             # bs, 1, n, 1
+        if collapse:
+            self.X = torch.argmax(self.X, dim=-1)
+            self.E = torch.argmax(self.E, dim=-1)
+            self.X[node_mask == 0] = - 1
+            self.E[(e_mask1 * e_mask2).squeeze(-1) == 0] = - 1
+        else:
+            self.X = self.X * x_mask
+            self.E = self.E * e_mask1 * e_mask2
+            assert torch.allclose(self.E, torch.transpose(self.E, 1, 2))
+        return self
+def setup_wandb(cfg):
+    config_dict = omegaconf.OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True)
+    kwargs = {'name': cfg.general.name, 'project': f'graph_ddm_{cfg.dataset.name}', 'config': config_dict,
+              'settings': wandb.Settings(_disable_stats=True), 'reinit': True, 'mode': cfg.general.wandb}
+    wandb.init(**kwargs)
+    wandb.save('*.txt')

distributions.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import torch
+class DistributionNodes:
+    def __init__(self, histogram):
+        """ Compute the distribution of the number of nodes in the dataset, and sample from this distribution.
+            historgram: dict. The keys are num_nodes, the values are counts
+        """
+        if type(histogram) == dict:
+            max_n_nodes = max(histogram.keys())
+            prob = torch.zeros(max_n_nodes + 1)
+            for num_nodes, count in histogram.items():
+                prob[num_nodes] = count
+        else:
+            prob = histogram
+        self.prob = prob / prob.sum()
+        self.m = torch.distributions.Categorical(prob)
+    def sample_n(self, n_samples, device):
+        idx = self.m.sample((n_samples,))
+        return idx.to(device)
+    def log_prob(self, batch_n_nodes):
+        assert len(batch_n_nodes.size()) == 1
+        p = self.prob.to(batch_n_nodes.device)
+        mask = batch_n_nodes >= p.shape[0]
+        batch_n_nodes[mask] = p.shape[0] - 1
+        probas = p[batch_n_nodes]
+        probas[mask] = 0
+        log_p = torch.log(probas + 1e-30)
+        return log_p

extra_features.py ADDED Viewed

	@@ -0,0 +1,275 @@

+import torch
+import utils
+class DummyExtraFeatures:
+    def __init__(self):
+        """ This class does not compute anything, just returns empty tensors."""
+    def __call__(self, noisy_data):
+        X = noisy_data['X_t']
+        E = noisy_data['E_t']
+        y = noisy_data['y_t']
+        empty_x = X.new_zeros((*X.shape[:-1], 0))
+        empty_e = E.new_zeros((*E.shape[:-1], 0))
+        empty_y = y.new_zeros((y.shape[0], 0))
+        return utils.PlaceHolder(X=empty_x, E=empty_e, y=empty_y)
+class ExtraFeatures:
+    def __init__(self, extra_features_type, max_n_nodes):
+        self.max_n_nodes = max_n_nodes
+        self.ncycles = NodeCycleFeatures()
+        self.features_type = extra_features_type
+        if extra_features_type in ['eigenvalues', 'all']:
+            self.eigenfeatures = EigenFeatures(mode=extra_features_type)
+    def __call__(self, noisy_data):
+        n = noisy_data['node_mask'].sum(dim=1).unsqueeze(1) / self.max_n_nodes
+        x_cycles, y_cycles = self.ncycles(noisy_data)       # (bs, n_cycles)
+        if self.features_type == 'cycles':
+            E = noisy_data['E_t']
+            extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E)
+            return utils.PlaceHolder(X=x_cycles, E=extra_edge_attr, y=torch.hstack((n, y_cycles)))
+        elif self.features_type == 'eigenvalues':
+            eigenfeatures = self.eigenfeatures(noisy_data)
+            E = noisy_data['E_t']
+            extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E)
+            n_components, batched_eigenvalues = eigenfeatures   # (bs, 1), (bs, 10)
+            return utils.PlaceHolder(X=x_cycles, E=extra_edge_attr, y=torch.hstack((n, y_cycles, n_components,
+                                                                                    batched_eigenvalues)))
+        elif self.features_type == 'all':
+            eigenfeatures = self.eigenfeatures(noisy_data)
+            E = noisy_data['E_t']
+            extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E)
+            n_components, batched_eigenvalues, nonlcc_indicator, k_lowest_eigvec = eigenfeatures   # (bs, 1), (bs, 10),
+                                                                                                # (bs, n, 1), (bs, n, 2)
+            return utils.PlaceHolder(X=torch.cat((x_cycles, nonlcc_indicator, k_lowest_eigvec), dim=-1),
+                                     E=extra_edge_attr,
+                                     y=torch.hstack((n, y_cycles, n_components, batched_eigenvalues)))
+        else:
+            raise ValueError(f"Features type {self.features_type} not implemented")
+class NodeCycleFeatures:
+    def __init__(self):
+        self.kcycles = KNodeCycles()
+    def __call__(self, noisy_data):
+        adj_matrix = noisy_data['E_t'][..., 1:].sum(dim=-1).float()
+        x_cycles, y_cycles = self.kcycles.k_cycles(adj_matrix=adj_matrix)   # (bs, n_cycles)
+        x_cycles = x_cycles.type_as(adj_matrix) * noisy_data['node_mask'].unsqueeze(-1)
+        # Avoid large values when the graph is dense
+        x_cycles = x_cycles / 10
+        y_cycles = y_cycles / 10
+        x_cycles[x_cycles > 1] = 1
+        y_cycles[y_cycles > 1] = 1
+        return x_cycles, y_cycles
+class EigenFeatures:
+    """
+    Code taken from : https://github.com/Saro00/DGN/blob/master/models/pytorch/eigen_agg.py
+    """
+    def __init__(self, mode):
+        """ mode: 'eigenvalues' or 'all' """
+        self.mode = mode
+    def __call__(self, noisy_data):
+        E_t = noisy_data['E_t']
+        mask = noisy_data['node_mask']
+        A = E_t[..., 1:].sum(dim=-1).float() * mask.unsqueeze(1) * mask.unsqueeze(2)
+        L = compute_laplacian(A, normalize=False)
+        mask_diag = 2 * L.shape[-1] * torch.eye(A.shape[-1]).type_as(L).unsqueeze(0)
+        mask_diag = mask_diag * (~mask.unsqueeze(1)) * (~mask.unsqueeze(2))
+        L = L * mask.unsqueeze(1) * mask.unsqueeze(2) + mask_diag
+        if self.mode == 'eigenvalues':
+            eigvals = torch.linalg.eigvalsh(L)        # bs, n
+            eigvals = eigvals.type_as(A) / torch.sum(mask, dim=1, keepdim=True)
+            n_connected_comp, batch_eigenvalues = get_eigenvalues_features(eigenvalues=eigvals)
+            return n_connected_comp.type_as(A), batch_eigenvalues.type_as(A)
+        elif self.mode == 'all':
+            eigvals, eigvectors = torch.linalg.eigh(L)
+            eigvals = eigvals.type_as(A) / torch.sum(mask, dim=1, keepdim=True)
+            eigvectors = eigvectors * mask.unsqueeze(2) * mask.unsqueeze(1)
+            # Retrieve eigenvalues features
+            n_connected_comp, batch_eigenvalues = get_eigenvalues_features(eigenvalues=eigvals)
+            # Retrieve eigenvectors features
+            nonlcc_indicator, k_lowest_eigenvector = get_eigenvectors_features(vectors=eigvectors,
+                                                                               node_mask=noisy_data['node_mask'],
+                                                                               n_connected=n_connected_comp)
+            return n_connected_comp, batch_eigenvalues, nonlcc_indicator, k_lowest_eigenvector
+        else:
+            raise NotImplementedError(f"Mode {self.mode} is not implemented")
+def compute_laplacian(adjacency, normalize: bool):
+    """
+    adjacency : batched adjacency matrix (bs, n, n)
+    normalize: can be None, 'sym' or 'rw' for the combinatorial, symmetric normalized or random walk Laplacians
+    Return:
+        L (n x n ndarray): combinatorial or symmetric normalized Laplacian.
+    """
+    diag = torch.sum(adjacency, dim=-1)     # (bs, n)
+    n = diag.shape[-1]
+    D = torch.diag_embed(diag)      # Degree matrix      # (bs, n, n)
+    combinatorial = D - adjacency                        # (bs, n, n)
+    if not normalize:
+        return (combinatorial + combinatorial.transpose(1, 2)) / 2
+    diag0 = diag.clone()
+    diag[diag == 0] = 1e-12
+    diag_norm = 1 / torch.sqrt(diag)            # (bs, n)
+    D_norm = torch.diag_embed(diag_norm)        # (bs, n, n)
+    L = torch.eye(n).unsqueeze(0) - D_norm @ adjacency @ D_norm
+    L[diag0 == 0] = 0
+    return (L + L.transpose(1, 2)) / 2
+def get_eigenvalues_features(eigenvalues, k=5):
+    """
+    values : eigenvalues -- (bs, n)
+    node_mask: (bs, n)
+    k: num of non zero eigenvalues to keep
+    """
+    ev = eigenvalues
+    bs, n = ev.shape
+    n_connected_components = (ev < 1e-5).sum(dim=-1)
+    # assert (n_connected_components > 0).all(), (n_connected_components, ev)
+    to_extend = max(n_connected_components) + k - n
+    if to_extend > 0:
+        eigenvalues = torch.hstack((eigenvalues, 2 * torch.ones(bs, to_extend).type_as(eigenvalues)))
+    indices = torch.arange(k).type_as(eigenvalues).long().unsqueeze(0) + n_connected_components.unsqueeze(1)
+    first_k_ev = torch.gather(eigenvalues, dim=1, index=indices)
+    return n_connected_components.unsqueeze(-1), first_k_ev
+def get_eigenvectors_features(vectors, node_mask, n_connected, k=2):
+    """
+    vectors (bs, n, n) : eigenvectors of Laplacian IN COLUMNS
+    returns:
+        not_lcc_indicator : indicator vectors of largest connected component (lcc) for each graph  -- (bs, n, 1)
+        k_lowest_eigvec : k first eigenvectors for the largest connected component   -- (bs, n, k)
+    """
+    bs, n = vectors.size(0), vectors.size(1)
+    # Create an indicator for the nodes outside the largest connected components
+    first_ev = torch.round(vectors[:, :, 0], decimals=3) * node_mask                        # bs, n
+    # Add random value to the mask to prevent 0 from becoming the mode
+    random = torch.randn(bs, n, device=node_mask.device) * (~node_mask)                                   # bs, n
+    first_ev = first_ev + random
+    most_common = torch.mode(first_ev, dim=1).values                                    # values: bs -- indices: bs
+    mask = ~ (first_ev == most_common.unsqueeze(1))
+    not_lcc_indicator = (mask * node_mask).unsqueeze(-1).float()
+    # Get the eigenvectors corresponding to the first nonzero eigenvalues
+    to_extend = max(n_connected) + k - n
+    if to_extend > 0:
+        vectors = torch.cat((vectors, torch.zeros(bs, n, to_extend).type_as(vectors)), dim=2)   # bs, n , n + to_extend
+    indices = torch.arange(k).type_as(vectors).long().unsqueeze(0).unsqueeze(0) + n_connected.unsqueeze(2)    # bs, 1, k
+    indices = indices.expand(-1, n, -1)                                               # bs, n, k
+    first_k_ev = torch.gather(vectors, dim=2, index=indices)       # bs, n, k
+    first_k_ev = first_k_ev * node_mask.unsqueeze(2)
+    return not_lcc_indicator, first_k_ev
+def batch_trace(X):
+    """
+    Expect a matrix of shape B N N, returns the trace in shape B
+    :param X:
+    :return:
+    """
+    diag = torch.diagonal(X, dim1=-2, dim2=-1)
+    trace = diag.sum(dim=-1)
+    return trace
+def batch_diagonal(X):
+    """
+    Extracts the diagonal from the last two dims of a tensor
+    :param X:
+    :return:
+    """
+    return torch.diagonal(X, dim1=-2, dim2=-1)
+class KNodeCycles:
+    """ Builds cycle counts for each node in a graph.
+    """
+    def __init__(self):
+        super().__init__()
+    def calculate_kpowers(self):
+        self.k1_matrix = self.adj_matrix.float()
+        self.d = self.adj_matrix.sum(dim=-1)
+        self.k2_matrix = self.k1_matrix @ self.adj_matrix.float()
+        self.k3_matrix = self.k2_matrix @ self.adj_matrix.float()
+        self.k4_matrix = self.k3_matrix @ self.adj_matrix.float()
+        self.k5_matrix = self.k4_matrix @ self.adj_matrix.float()
+        self.k6_matrix = self.k5_matrix @ self.adj_matrix.float()
+    def k3_cycle(self):
+        """ tr(A ** 3). """
+        c3 = batch_diagonal(self.k3_matrix)
+        return (c3 / 2).unsqueeze(-1).float(), (torch.sum(c3, dim=-1) / 6).unsqueeze(-1).float()
+    def k4_cycle(self):
+        diag_a4 = batch_diagonal(self.k4_matrix)
+        c4 = diag_a4 - self.d * (self.d - 1) - (self.adj_matrix @ self.d.unsqueeze(-1)).sum(dim=-1)
+        return (c4 / 2).unsqueeze(-1).float(), (torch.sum(c4, dim=-1) / 8).unsqueeze(-1).float()
+    def k5_cycle(self):
+        diag_a5 = batch_diagonal(self.k5_matrix)
+        triangles = batch_diagonal(self.k3_matrix)
+        c5 = diag_a5 - 2 * triangles * self.d - (self.adj_matrix @ triangles.unsqueeze(-1)).sum(dim=-1) + triangles
+        return (c5 / 2).unsqueeze(-1).float(), (c5.sum(dim=-1) / 10).unsqueeze(-1).float()
+    def k6_cycle(self):
+        term_1_t = batch_trace(self.k6_matrix)
+        term_2_t = batch_trace(self.k3_matrix ** 2)
+        term3_t = torch.sum(self.adj_matrix * self.k2_matrix.pow(2), dim=[-2, -1])
+        d_t4 = batch_diagonal(self.k2_matrix)
+        a_4_t = batch_diagonal(self.k4_matrix)
+        term_4_t = (d_t4 * a_4_t).sum(dim=-1)
+        term_5_t = batch_trace(self.k4_matrix)
+        term_6_t = batch_trace(self.k3_matrix)
+        term_7_t = batch_diagonal(self.k2_matrix).pow(3).sum(-1)
+        term8_t = torch.sum(self.k3_matrix, dim=[-2, -1])
+        term9_t = batch_diagonal(self.k2_matrix).pow(2).sum(-1)
+        term10_t = batch_trace(self.k2_matrix)
+        c6_t = (term_1_t - 3 * term_2_t + 9 * term3_t - 6 * term_4_t + 6 * term_5_t - 4 * term_6_t + 4 * term_7_t +
+                3 * term8_t - 12 * term9_t + 4 * term10_t)
+        return None, (c6_t / 12).unsqueeze(-1).float()
+    def k_cycles(self, adj_matrix, verbose=False):
+        self.adj_matrix = adj_matrix
+        self.calculate_kpowers()
+        k3x, k3y = self.k3_cycle()
+        assert (k3x >= -0.1).all()
+        k4x, k4y = self.k4_cycle()
+        assert (k4x >= -0.1).all()
+        k5x, k5y = self.k5_cycle()
+        assert (k5x >= -0.1).all(), k5x
+        _, k6y = self.k6_cycle()
+        assert (k6y >= -0.1).all()
+        kcyclesx = torch.cat([k3x, k4x, k5x], dim=-1)
+        kcyclesy = torch.cat([k3y, k4y, k5y, k6y], dim=-1)
+        return kcyclesx, kcyclesy

models/__init__.py ADDED Viewed

File without changes

models/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (163 Bytes). View file

models/__pycache__/layers.cpython-39.pyc ADDED Viewed

Binary file (1.91 kB). View file

models/__pycache__/transformer_model.cpython-39.pyc ADDED Viewed

Binary file (7.99 kB). View file

models/layers.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import torch
+import torch.nn as nn
+class Xtoy(nn.Module):
+    def __init__(self, dx, dy):
+        """ Map node features to global features """
+        super().__init__()
+        self.lin = nn.Linear(4 * dx, dy)
+    def forward(self, X):
+        """ X: bs, n, dx. """
+        m = X.mean(dim=1)
+        mi = X.min(dim=1)[0]
+        ma = X.max(dim=1)[0]
+        std = X.std(dim=1)
+        z = torch.hstack((m, mi, ma, std))
+        out = self.lin(z)
+        return out
+class Etoy(nn.Module):
+    def __init__(self, d, dy):
+        """ Map edge features to global features. """
+        super().__init__()
+        self.lin = nn.Linear(4 * d, dy)
+    def forward(self, E):
+        """ E: bs, n, n, de
+            Features relative to the diagonal of E could potentially be added.
+        """
+        m = E.mean(dim=(1, 2))
+        mi = E.min(dim=2)[0].min(dim=1)[0]
+        ma = E.max(dim=2)[0].max(dim=1)[0]
+        std = torch.std(E, dim=(1, 2))
+        z = torch.hstack((m, mi, ma, std))
+        out = self.lin(z)
+        return out
+def masked_softmax(x, mask, **kwargs):
+    if mask.sum() == 0:
+        return x
+    x_masked = x.clone()
+    x_masked[mask == 0] = -float("inf")
+    return torch.softmax(x_masked, **kwargs)

models/transformer_model.py ADDED Viewed

	@@ -0,0 +1,285 @@

+import math
+import torch
+import torch.nn as nn
+from torch.nn.modules.dropout import Dropout
+from torch.nn.modules.linear import Linear
+from torch.nn.modules.normalization import LayerNorm
+from torch.nn import functional as F
+from torch import Tensor
+import utils
+from diffusion import diffusion_utils
+from models.layers import Xtoy, Etoy, masked_softmax
+class XEyTransformerLayer(nn.Module):
+    """ Transformer that updates node, edge and global features
+        d_x: node features
+        d_e: edge features
+        dz : global features
+        n_head: the number of heads in the multi_head_attention
+        dim_feedforward: the dimension of the feedforward network model after self-attention
+        dropout: dropout probablility. 0 to disable
+        layer_norm_eps: eps value in layer normalizations.
+    """
+    def __init__(self, dx: int, de: int, dy: int, n_head: int, dim_ffX: int = 2048,
+                 dim_ffE: int = 128, dim_ffy: int = 2048, dropout: float = 0.1,
+                 layer_norm_eps: float = 1e-5, device=None, dtype=None) -> None:
+        kw = {'device': device, 'dtype': dtype}
+        super().__init__()
+        self.self_attn = NodeEdgeBlock(dx, de, dy, n_head, **kw)
+        self.linX1 = Linear(dx, dim_ffX, **kw)
+        self.linX2 = Linear(dim_ffX, dx, **kw)
+        self.normX1 = LayerNorm(dx, eps=layer_norm_eps, **kw)
+        self.normX2 = LayerNorm(dx, eps=layer_norm_eps, **kw)
+        self.dropoutX1 = Dropout(dropout)
+        self.dropoutX2 = Dropout(dropout)
+        self.dropoutX3 = Dropout(dropout)
+        self.linE1 = Linear(de, dim_ffE, **kw)
+        self.linE2 = Linear(dim_ffE, de, **kw)
+        self.normE1 = LayerNorm(de, eps=layer_norm_eps, **kw)
+        self.normE2 = LayerNorm(de, eps=layer_norm_eps, **kw)
+        self.dropoutE1 = Dropout(dropout)
+        self.dropoutE2 = Dropout(dropout)
+        self.dropoutE3 = Dropout(dropout)
+        self.lin_y1 = Linear(dy, dim_ffy, **kw)
+        self.lin_y2 = Linear(dim_ffy, dy, **kw)
+        self.norm_y1 = LayerNorm(dy, eps=layer_norm_eps, **kw)
+        self.norm_y2 = LayerNorm(dy, eps=layer_norm_eps, **kw)
+        self.dropout_y1 = Dropout(dropout)
+        self.dropout_y2 = Dropout(dropout)
+        self.dropout_y3 = Dropout(dropout)
+        self.activation = F.relu
+    def forward(self, X: Tensor, E: Tensor, y, node_mask: Tensor):
+        """ Pass the input through the encoder layer.
+            X: (bs, n, d)
+            E: (bs, n, n, d)
+            y: (bs, dy)
+            node_mask: (bs, n) Mask for the src keys per batch (optional)
+            Output: newX, newE, new_y with the same shape.
+        """
+        newX, newE, new_y = self.self_attn(X, E, y, node_mask=node_mask)
+        newX_d = self.dropoutX1(newX)
+        X = self.normX1(X + newX_d)
+        newE_d = self.dropoutE1(newE)
+        E = self.normE1(E + newE_d)
+        new_y_d = self.dropout_y1(new_y)
+        y = self.norm_y1(y + new_y_d)
+        ff_outputX = self.linX2(self.dropoutX2(self.activation(self.linX1(X))))
+        ff_outputX = self.dropoutX3(ff_outputX)
+        X = self.normX2(X + ff_outputX)
+        ff_outputE = self.linE2(self.dropoutE2(self.activation(self.linE1(E))))
+        ff_outputE = self.dropoutE3(ff_outputE)
+        E = self.normE2(E + ff_outputE)
+        ff_output_y = self.lin_y2(self.dropout_y2(self.activation(self.lin_y1(y))))
+        ff_output_y = self.dropout_y3(ff_output_y)
+        y = self.norm_y2(y + ff_output_y)
+        return X, E, y
+class NodeEdgeBlock(nn.Module):
+    """ Self attention layer that also updates the representations on the edges. """
+    def __init__(self, dx, de, dy, n_head, **kwargs):
+        super().__init__()
+        assert dx % n_head == 0, f"dx: {dx} -- nhead: {n_head}"
+        self.dx = dx
+        self.de = de
+        self.dy = dy
+        self.df = int(dx / n_head)
+        self.n_head = n_head
+        # Attention
+        self.q = Linear(dx, dx)
+        self.k = Linear(dx, dx)
+        self.v = Linear(dx, dx)
+        # FiLM E to X
+        self.e_add = Linear(de, dx)
+        self.e_mul = Linear(de, dx)
+        # FiLM y to E
+        self.y_e_mul = Linear(dy, dx)           # Warning: here it's dx and not de
+        self.y_e_add = Linear(dy, dx)
+        # FiLM y to X
+        self.y_x_mul = Linear(dy, dx)
+        self.y_x_add = Linear(dy, dx)
+        # Process y
+        self.y_y = Linear(dy, dy)
+        self.x_y = Xtoy(dx, dy)
+        self.e_y = Etoy(de, dy)
+        # Output layers
+        self.x_out = Linear(dx, dx)
+        self.e_out = Linear(dx, de)
+        self.y_out = nn.Sequential(nn.Linear(dy, dy), nn.ReLU(), nn.Linear(dy, dy))
+    def forward(self, X, E, y, node_mask):
+        """
+        :param X: bs, n, d        node features
+        :param E: bs, n, n, d     edge features
+        :param y: bs, dz           global features
+        :param node_mask: bs, n
+        :return: newX, newE, new_y with the same shape.
+        """
+        bs, n, _ = X.shape
+        x_mask = node_mask.unsqueeze(-1)        # bs, n, 1
+        e_mask1 = x_mask.unsqueeze(2)           # bs, n, 1, 1
+        e_mask2 = x_mask.unsqueeze(1)           # bs, 1, n, 1
+        # 1. Map X to keys and queries
+        Q = self.q(X) * x_mask           # (bs, n, dx)
+        K = self.k(X) * x_mask           # (bs, n, dx)
+        diffusion_utils.assert_correctly_masked(Q, x_mask)
+        # 2. Reshape to (bs, n, n_head, df) with dx = n_head * df
+        Q = Q.reshape((Q.size(0), Q.size(1), self.n_head, self.df))
+        K = K.reshape((K.size(0), K.size(1), self.n_head, self.df))
+        Q = Q.unsqueeze(2)                              # (bs, 1, n, n_head, df)
+        K = K.unsqueeze(1)                              # (bs, n, 1, n head, df)
+        # Compute unnormalized attentions. Y is (bs, n, n, n_head, df)
+        Y = Q * K
+        Y = Y / math.sqrt(Y.size(-1))
+        diffusion_utils.assert_correctly_masked(Y, (e_mask1 * e_mask2).unsqueeze(-1))
+        E1 = self.e_mul(E) * e_mask1 * e_mask2                        # bs, n, n, dx
+        E1 = E1.reshape((E.size(0), E.size(1), E.size(2), self.n_head, self.df))
+        E2 = self.e_add(E) * e_mask1 * e_mask2                        # bs, n, n, dx
+        E2 = E2.reshape((E.size(0), E.size(1), E.size(2), self.n_head, self.df))
+        # Incorporate edge features to the self attention scores.
+        Y = Y * (E1 + 1) + E2                  # (bs, n, n, n_head, df)
+        # Incorporate y to E
+        newE = Y.flatten(start_dim=3)                      # bs, n, n, dx
+        ye1 = self.y_e_add(y).unsqueeze(1).unsqueeze(1)  # bs, 1, 1, de
+        ye2 = self.y_e_mul(y).unsqueeze(1).unsqueeze(1)
+        newE = ye1 + (ye2 + 1) * newE
+        # Output E
+        newE = self.e_out(newE) * e_mask1 * e_mask2      # bs, n, n, de
+        diffusion_utils.assert_correctly_masked(newE, e_mask1 * e_mask2)
+        # Compute attentions. attn is still (bs, n, n, n_head, df)
+        softmax_mask = e_mask2.expand(-1, n, -1, self.n_head)    # bs, 1, n, 1
+        attn = masked_softmax(Y, softmax_mask, dim=2)  # bs, n, n, n_head
+        V = self.v(X) * x_mask                        # bs, n, dx
+        V = V.reshape((V.size(0), V.size(1), self.n_head, self.df))
+        V = V.unsqueeze(1)                                     # (bs, 1, n, n_head, df)
+        # Compute weighted values
+        weighted_V = attn * V
+        weighted_V = weighted_V.sum(dim=2)
+        # Send output to input dim
+        weighted_V = weighted_V.flatten(start_dim=2)            # bs, n, dx
+        # Incorporate y to X
+        yx1 = self.y_x_add(y).unsqueeze(1)
+        yx2 = self.y_x_mul(y).unsqueeze(1)
+        newX = yx1 + (yx2 + 1) * weighted_V
+        # Output X
+        newX = self.x_out(newX) * x_mask
+        diffusion_utils.assert_correctly_masked(newX, x_mask)
+        # Process y based on X axnd E
+        y = self.y_y(y)
+        e_y = self.e_y(E)
+        x_y = self.x_y(X)
+        new_y = y + x_y + e_y
+        new_y = self.y_out(new_y)               # bs, dy
+        return newX, newE, new_y
+class GraphTransformer(nn.Module):
+    """
+    n_layers : int -- number of layers
+    dims : dict -- contains dimensions for each feature type
+    """
+    def __init__(self, n_layers: int, input_dims: dict, cond_dims: int, hidden_mlp_dims: dict, hidden_dims: dict,
+                 output_dims: dict, act_fn_in: nn.ReLU(), act_fn_out: nn.ReLU()):
+        super().__init__()
+        self.n_layers = n_layers
+        self.out_dim_X = output_dims['X']
+        self.out_dim_E = output_dims['E']
+        self.out_dim_y = output_dims['y']
+        self.mlp_in_X = nn.Sequential(nn.Linear(input_dims['X'] + cond_dims, hidden_mlp_dims['X']), act_fn_in,
+                                      nn.Linear(hidden_mlp_dims['X'], hidden_dims['dx']), act_fn_in)
+        self.mlp_in_E = nn.Sequential(nn.Linear(input_dims['E'] + cond_dims, hidden_mlp_dims['E']), act_fn_in,
+                                      nn.Linear(hidden_mlp_dims['E'], hidden_dims['de']), act_fn_in)
+        self.mlp_in_y = nn.Sequential(nn.Linear(input_dims['y'], hidden_mlp_dims['y']), act_fn_in,
+                                      nn.Linear(hidden_mlp_dims['y'], hidden_dims['dy']), act_fn_in)
+        self.tf_layers = nn.ModuleList([XEyTransformerLayer(dx=hidden_dims['dx'],
+                                                            de=hidden_dims['de'],
+                                                            dy=hidden_dims['dy'],
+                                                            n_head=hidden_dims['n_head'],
+                                                            dim_ffX=hidden_dims['dim_ffX'],
+                                                            dim_ffE=hidden_dims['dim_ffE'])
+                                        for i in range(n_layers)])
+        self.mlp_out_X = nn.Sequential(nn.Linear(hidden_dims['dx'], hidden_mlp_dims['X']), act_fn_out,
+                                       nn.Linear(hidden_mlp_dims['X'], output_dims['X']))
+        self.mlp_out_E = nn.Sequential(nn.Linear(hidden_dims['de'], hidden_mlp_dims['E']), act_fn_out,
+                                       nn.Linear(hidden_mlp_dims['E'], output_dims['E']))
+        self.mlp_out_y = nn.Sequential(nn.Linear(hidden_dims['dy'], hidden_mlp_dims['y']), act_fn_out,
+                                       nn.Linear(hidden_mlp_dims['y'], output_dims['y']))
+    def forward(self, X, E, y, node_mask):
+        bs, n = X.shape[0], X.shape[1]
+        diag_mask = torch.eye(n)
+        diag_mask = ~diag_mask.type_as(E).bool()
+        diag_mask = diag_mask.unsqueeze(0).unsqueeze(-1).expand(bs, -1, -1, -1)
+        X_to_out = X[..., :self.out_dim_X]
+        E_to_out = E[..., :self.out_dim_E]
+        y_to_out = y[..., :self.out_dim_y]
+        new_E = self.mlp_in_E(E)
+        new_E = (new_E + new_E.transpose(1, 2)) / 2
+        after_in = utils.PlaceHolder(X=self.mlp_in_X(X), E=new_E, y=self.mlp_in_y(y)).mask(node_mask)
+        X, E, y = after_in.X, after_in.E, after_in.y
+        for layer in self.tf_layers:
+            X, E, y = layer(X, E, y, node_mask)
+        X = self.mlp_out_X(X)
+        E = self.mlp_out_E(E)
+        y = self.mlp_out_y(y)
+        X = (X + X_to_out)
+        E = (E + E_to_out) * diag_mask
+        y = y + y_to_out
+        E = 1/2 * (E + torch.transpose(E, 1, 2))
+        return utils.PlaceHolder(X=X, E=E, y=y).mask(node_mask)

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+networkx==2.8.7
+numpy==1.23
+omegaconf==2.3.0
+pytorch_lightning==2.0.4
+torch_geometric==2.3.1
+torchmetrics==0.11.4
+tqdm==4.65.0
+torch==2.3.0
+torchvision==0.18.0
+torchaudio==2.3.0
+gradio==4.32.0
+wandb==0.15.4
+sentence-transformers==2.6.1
+PyGSP==0.5.1
+pyemd==1.0.0

utils.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import os
+import torch_geometric.utils
+from omegaconf import OmegaConf, open_dict
+from torch_geometric.utils import to_dense_adj, to_dense_batch
+import torch
+import omegaconf
+import wandb
+def create_folders(args):
+    try:
+        # os.makedirs('checkpoints')
+        os.makedirs('graphs')
+        os.makedirs('chains')
+    except OSError:
+        pass
+    try:
+        # os.makedirs('checkpoints/' + args.general.name)
+        os.makedirs('graphs/' + args.general.name)
+        os.makedirs('chains/' + args.general.name)
+    except OSError:
+        pass
+def normalize(X, E, y, norm_values, norm_biases, node_mask):
+    X = (X - norm_biases[0]) / norm_values[0]
+    E = (E - norm_biases[1]) / norm_values[1]
+    y = (y - norm_biases[2]) / norm_values[2]
+    diag = torch.eye(E.shape[1], dtype=torch.bool).unsqueeze(0).expand(E.shape[0], -1, -1)
+    E[diag] = 0
+    return PlaceHolder(X=X, E=E, y=y).mask(node_mask)
+def unnormalize(X, E, y, norm_values, norm_biases, node_mask, collapse=False):
+    """
+    X : node features
+    E : edge features
+    y : global features`
+    norm_values : [norm value X, norm value E, norm value y]
+    norm_biases : same order
+    node_mask
+    """
+    X = (X * norm_values[0] + norm_biases[0])
+    E = (E * norm_values[1] + norm_biases[1])
+    y = y * norm_values[2] + norm_biases[2]
+    return PlaceHolder(X=X, E=E, y=y).mask(node_mask, collapse)
+def to_dense(x, edge_index, edge_attr, batch):
+    X, node_mask = to_dense_batch(x=x, batch=batch)
+    # node_mask = node_mask.float()
+    edge_index, edge_attr = torch_geometric.utils.remove_self_loops(edge_index, edge_attr)
+    # TODO: carefully check if setting node_mask as a bool breaks the continuous case
+    max_num_nodes = X.size(1)
+    E = to_dense_adj(edge_index=edge_index, batch=batch, edge_attr=edge_attr, max_num_nodes=max_num_nodes)
+    E = encode_no_edge(E)
+    return PlaceHolder(X=X, E=E, y=None), node_mask
+def encode_no_edge(E):
+    assert len(E.shape) == 4
+    if E.shape[-1] == 0:
+        return E
+    no_edge = torch.sum(E, dim=3) == 0
+    first_elt = E[:, :, :, 0]
+    first_elt[no_edge] = 1
+    E[:, :, :, 0] = first_elt
+    diag = torch.eye(E.shape[1], dtype=torch.bool).unsqueeze(0).expand(E.shape[0], -1, -1)
+    E[diag] = 0
+    return E
+def update_config_with_new_keys(cfg, saved_cfg):
+    saved_general = saved_cfg.general
+    saved_train = saved_cfg.train
+    saved_model = saved_cfg.model
+    for key, val in saved_general.items():
+        OmegaConf.set_struct(cfg.general, True)
+        with open_dict(cfg.general):
+            if key not in cfg.general.keys():
+                setattr(cfg.general, key, val)
+    OmegaConf.set_struct(cfg.train, True)
+    with open_dict(cfg.train):
+        for key, val in saved_train.items():
+            if key not in cfg.train.keys():
+                setattr(cfg.train, key, val)
+    OmegaConf.set_struct(cfg.model, True)
+    with open_dict(cfg.model):
+        for key, val in saved_model.items():
+            if key not in cfg.model.keys():
+                setattr(cfg.model, key, val)
+    return cfg
+class PlaceHolder:
+    def __init__(self, X, E, y):
+        self.X = X
+        self.E = E
+        self.y = y
+    def type_as(self, x: torch.Tensor):
+        """ Changes the device and dtype of X, E, y. """
+        self.X = self.X.type_as(x)
+        self.E = self.E.type_as(x)
+        self.y = self.y.type_as(x)
+        return self
+    def mask(self, node_mask, collapse=False):
+        x_mask = node_mask.unsqueeze(-1)          # bs, n, 1
+        e_mask1 = x_mask.unsqueeze(2)             # bs, n, 1, 1
+        e_mask2 = x_mask.unsqueeze(1)             # bs, 1, n, 1
+        if collapse:
+            self.X = torch.argmax(self.X, dim=-1)
+            self.E = torch.argmax(self.E, dim=-1)
+            self.X[node_mask == 0] = - 1
+            self.E[(e_mask1 * e_mask2).squeeze(-1) == 0] = - 1
+        else:
+            self.X = self.X * x_mask
+            self.E = self.E * e_mask1 * e_mask2
+            assert torch.allclose(self.E, torch.transpose(self.E, 1, 2))
+        return self
+def setup_wandb(cfg):
+    config_dict = omegaconf.OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True)
+    kwargs = {'name': cfg.general.name, 'project': f'graph_ddm_{cfg.dataset.name}', 'config': config_dict,
+              'settings': wandb.Settings(_disable_stats=True), 'reinit': True, 'mode': cfg.general.wandb}
+    wandb.init(**kwargs)
+    wandb.save('*.txt')