Activity 3: Implementing a graph convolutional neural network
The cell below converts a name of a molecule into a graph (nodes, edges).
import requests
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.rdBase import BlockLogs
from rdkit.DataStructs.cDataStructs import TanimotoSimilarity
import numpy as np
# turn off rdkit warnings
block = BlockLogs()
# use SVGs and make molecules fill image rather than all be same scale
Draw.IPythonConsole.ipython_useSVG = True
Draw.IPythonConsole.drawOptions.drawMolsSameScale = False
def get_cids(text):
"""
Search pubchem and return best matching record
"""
url = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/cids/TXT'.format(text)
response = requests.get(url)
cids = response.text.split()
if len(cids) == 0:
return None
else:
return cids
def get_record(cid):
"""
Get pubchem record for a given cid and returns molecule as rdkit
"""
url = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/record/SDF'.format(cid)
response = requests.get(url)
mol = Chem.MolFromMolBlock(response.text)
return mol
def get_molecule(text, n_results=1):
"""
Search pubchem and return best matching record
"""
cids = get_cids(text)
if cids is None:
return None
else:
if n_results == 1:
return get_record(cids[0])
else:
return [get_record(cid) for cid in cids[:n_results]]
def str2graph(text):
"""Convert text to graph via pubchem look-up
"""
m = get_molecule(text)
m = Chem.AddHs(m)
order_string = {
Chem.rdchem.BondType.SINGLE: 1,
Chem.rdchem.BondType.DOUBLE: 2,
Chem.rdchem.BondType.TRIPLE: 3,
Chem.rdchem.BondType.AROMATIC: 4,
}
N = len(list(m.GetAtoms()))
nodes = np.zeros((N, 100))
for i in m.GetAtoms():
nodes[i.GetIdx(), i.GetAtomicNum()] = 1
adj = np.zeros((N, N))
for j in m.GetBonds():
u = min(j.GetBeginAtomIdx(), j.GetEndAtomIdx())
v = max(j.GetBeginAtomIdx(), j.GetEndAtomIdx())
adj[u, v] = 1
adj[v, u] = 1
return nodes, adj
nodes, edges = str2graph('octane')
Computing the convolution
Recall the equation for the GCN:
\[
v_{il} = \sigma\left(\frac{1}{d_i}e_{ij}v_{jk}w_{lk}\right)
\]
See if you can fill in the missing details in the code below. Think about what the new shape should be!
F = 4
# one-hot vector size (100) by feature number (F)
weights = np.random.randn(100, F)
sigma = np.math.tanh
degree = np.sum(edges, axis=0)
new_nodes = ....