Activity 3: Implementing a graph convolutional neural network

The cell below converts a name of a molecule into a graph (nodes, edges).

import requests
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.rdBase import BlockLogs
from rdkit.DataStructs.cDataStructs import TanimotoSimilarity 
import numpy as np

# turn off rdkit warnings
block = BlockLogs()

# use SVGs and make molecules fill image rather than all be same scale
Draw.IPythonConsole.ipython_useSVG = True
Draw.IPythonConsole.drawOptions.drawMolsSameScale = False

def get_cids(text):
    Search pubchem and return best matching record
    url = '{}/cids/TXT'.format(text)
    response = requests.get(url)
    cids = response.text.split()
    if len(cids) == 0:
        return None
        return cids
def get_record(cid):
    Get pubchem record for a given cid and returns molecule as rdkit
    url = '{}/record/SDF'.format(cid)
    response = requests.get(url)
    mol = Chem.MolFromMolBlock(response.text)
    return mol

def get_molecule(text, n_results=1):
    Search pubchem and return best matching record
    cids = get_cids(text)
    if cids is None:
        return None
        if n_results == 1:
            return get_record(cids[0])
            return [get_record(cid) for cid in cids[:n_results]]
def str2graph(text):
    """Convert text to graph via pubchem look-up
    m = get_molecule(text)
    m = Chem.AddHs(m)
    order_string = {
        Chem.rdchem.BondType.SINGLE: 1,
        Chem.rdchem.BondType.DOUBLE: 2,
        Chem.rdchem.BondType.TRIPLE: 3,
        Chem.rdchem.BondType.AROMATIC: 4,
    N = len(list(m.GetAtoms()))
    nodes = np.zeros((N, 100))
    for i in m.GetAtoms():
        nodes[i.GetIdx(), i.GetAtomicNum()] = 1

    adj = np.zeros((N, N))
    for j in m.GetBonds():
        u = min(j.GetBeginAtomIdx(), j.GetEndAtomIdx())
        v = max(j.GetBeginAtomIdx(), j.GetEndAtomIdx())
        adj[u, v] = 1
        adj[v, u] = 1
    return nodes, adj
nodes, edges = str2graph('octane')

Computing the convolution

Recall the equation for the GCN:

\[ v_{il} = \sigma\left(\frac{1}{d_i}e_{ij}v_{jk}w_{lk}\right) \]

See if you can fill in the missing details in the code below. Think about what the new shape should be!

F = 4

# one-hot vector size (100) by feature number (F)
weights = np.random.randn(100, F)
sigma = np.math.tanh
degree = np.sum(edges, axis=0)
new_nodes = ....