Option Pricing using Neural Networks

I recently wrote an article for Dorian Trader on using a neural network to replicate option pricing models. The idea behind using neural networks in option pricing, is that while the original numerical model might be slow, the neural network (once trained) is lightning fast at pricing options.

In that article, I fitted a neural network to the vanilla Black-Scholes formulae as a proof-of-concept. I used ChatGPT to generate an initial piece of code, before tweaking it myself to improve the accuracy and configure the output how I wanted it. So this exercise is also an interesting demonstration of AI-assisted coding.

You can read the full article here

Below I’ve attached the python code that I used. The code trains the neural network using your Nvidia GPU if you have one, else it uses the CPU.

There’s many parameters you can alter, including the number of options to use in fitting, number and size of the layers in the neural network, and number of epochs to train for.

An interesting exercise would be to try to modify the code to work with more complex options, such as barrier options or American options.

Python Code

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# Black-Scholes formula for European call options
def bs_price(S, K, T, r, sigma):
    from torch.distributions import Normal
    sqrtT = torch.sqrt(T)
    d1 = (torch.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * sqrtT)
    d2 = d1 - sigma * sqrtT
    norm = Normal(0., 1.)
    return S * norm.cdf(d1) - K * torch.exp(-r * T) * norm.cdf(d2)

# Generate dataset
N = 600_000
S = torch.rand(N, 1) * 100 + 1
K = torch.rand(N, 1) * 100 + 1
T = torch.rand(N, 1) * 2 + 0.01
r = torch.rand(N, 1) * 0.1
sigma = torch.rand(N, 1) * 0.5 + 0.01
prices = bs_price(S, K, T, r, sigma)

# Transform the data so S and T are one
K2 = K / S
T2 = torch.ones_like(K)
sigma2 = sigma * torch.sqrt(T)
DF = torch.exp(-r * T)
prices2 = prices / S

X = torch.cat([K2, T, r, sigma2, DF], dim=1)
y = prices2

train_size = int(0.8 * N)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

batch_size = 512
dataset = TensorDataset(X_train, y_train)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Network configuration
hidden_size = 256
num_layers = 6

def build_model(input_dim, hidden_size, num_layers):
    layers = [nn.Linear(input_dim, hidden_size), nn.ReLU()]
    for _ in range(num_layers - 1):
        layers += [nn.Linear(hidden_size, hidden_size), nn.ReLU()]
    layers.append(nn.Linear(hidden_size, 1))
    return nn.Sequential(*layers)

class BSNet(nn.Module):
    def __init__(self):
        super(BSNet, self).__init__()
        self.model = build_model(5, hidden_size, num_layers)
    def forward(self, x):
        return self.model(x)

# Initialize
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = BSNet().to(device)
optimizer = optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-6)
criterion = nn.MSELoss()

# Training loop
epochs = 200
for epoch in range(1, epochs + 1):
    net.train()
    running_loss = 0.0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        preds = net(xb)
        loss = criterion(preds, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * xb.size(0)
    epoch_loss = running_loss / train_size
    if epoch % 50 == 0:
        print(f"Epoch {epoch}/{epochs}, Training MSE: {epoch_loss:.10f}")

def eval_test():
    net.eval()
    with torch.no_grad():
        X_device = X_test.to(device)
        preds = net(X_device)
        return criterion(preds, y_test.to(device)).item()

test_loss = eval_test()
print(f"\nTest MSE after {epochs} epochs: {test_loss:.10f}")

def price_option(S_val, K_val, T_val, r_val, sigma_val):
    net.eval()
    with torch.no_grad():
        valsbs = torch.tensor([[S_val, K_val, T_val, r_val, sigma_val]], device=device)
        vals = torch.tensor([[K_val/S_val, T_val, r_val, sigma_val*torch.sqrt(torch.tensor(T_val)), torch.exp(torch.tensor(-r_val*T_val))]], device=device)
        nn_p = net(vals).cpu().item()*S_val
        bs_p = bs_price(valsbs[:,0:1], valsbs[:,1:2], valsbs[:,2:3], valsbs[:,3:4], valsbs[:,4:5]).cpu().item()
    print(nn_p, bs_p)

print("\nTesting on some options:")
price_option(55, 60, 1.2, .03, .12)
price_option(340, 330, 1.4, .02, .15)
price_option(131, 131, 0.6, .02, .1)