Every tensor in PyTorch can optionally track all operations performed on it. When you call .backward(), PyTorch walks back through every operation and computes gradients automatically.
This is called automatic differentiation and it's the reason deep learning works at scale. You never have to compute derivatives by hand.
import torch
# Create a tensor and tell PyTorch to track operations on it
x = torch.tensor(3.0, requires_grad=True)
# Do some math
y = x ** 2 + 2 * x + 1 # y = x^2 + 2x + 1
# Compute the gradient: dy/dx = 2x + 2 = 2(3) + 2 = 8
y.backward()
print(f"x = {x.item()}")
print(f"y = x^2 + 2x + 1 = {y.item()}")
print(f"dy/dx = 2x + 2 = {x.grad.item()}") # 8.0
# PyTorch computed the derivative automatically. That's autograd.