Data validation with Pydantic
Install
bunpy add pydantic pydantic-settingsBasic model
Define a model by subclassing BaseModel. Every annotated field is validated when you construct an instance:
from pydantic import BaseModel
class User(BaseModel):
id: int
name: str
email: str
is_active: bool = True
user = User(id=1, name="Alice", email="alice@example.com")
print(user.id) # 1
print(user.is_active) # True
# Pydantic coerces compatible types
user2 = User(id="42", name="Bob", email="bob@example.com")
print(type(user2.id)) # <class 'int'>Invalid data raises ValidationError with field-level details:
from pydantic import BaseModel, ValidationError
class User(BaseModel):
id: int
email: str
try:
User(id="not-a-number", email="bad")
except ValidationError as exc:
print(exc.error_count(), "error(s)")
for error in exc.errors():
print(error["loc"], error["msg"])Field validators
Use @field_validator for custom per-field logic:
from pydantic import BaseModel, field_validator, EmailStr
class SignupRequest(BaseModel):
username: str
email: str
password: str
@field_validator("username")
@classmethod
def username_must_be_lowercase(cls, value: str) -> str:
if value != value.lower():
raise ValueError("username must be all lowercase")
return value
@field_validator("password")
@classmethod
def password_min_length(cls, value: str) -> str:
if len(value) < 8:
raise ValueError("password must be at least 8 characters")
return value
req = SignupRequest(username="alice", email="alice@example.com", password="hunter42")
print(req.username)model_validator for cross-field validation
@model_validator runs after all fields are populated and lets you check relationships between them:
from pydantic import BaseModel, model_validator
from datetime import date
class DateRange(BaseModel):
start: date
end: date
@model_validator(mode="after")
def end_must_be_after_start(self) -> "DateRange":
if self.end <= self.start:
raise ValueError("end date must be after start date")
return self
r = DateRange(start=date(2024, 1, 1), end=date(2024, 12, 31))
print(r.start, "->", r.end)Nested models
Models compose naturally:
from pydantic import BaseModel
from typing import Optional
class Address(BaseModel):
street: str
city: str
country: str
postal_code: Optional[str] = None
class Company(BaseModel):
name: str
address: Address
employee_count: int
company = Company(
name="Acme Corp",
address={"street": "123 Main St", "city": "Springfield", "country": "US"},
employee_count=250,
)
print(company.address.city) # SpringfieldSerialization
Convert models to dicts and JSON with model_dump and model_dump_json:
from pydantic import BaseModel
from datetime import datetime
from typing import Optional
class Post(BaseModel):
id: int
title: str
body: str
published_at: Optional[datetime] = None
post = Post(id=1, title="Hello", body="World", published_at=datetime.now())
# dict - by_alias and exclude_none are useful for API output
d = post.model_dump()
print(d)
# JSON string
j = post.model_dump_json(indent=2)
print(j)
# exclude sensitive fields
safe = post.model_dump(exclude={"body"})
print(safe)Deserialize from a dict or JSON string:
from pydantic import BaseModel
from datetime import datetime
class Post(BaseModel):
id: int
title: str
published_at: datetime
# from dict
post = Post.model_validate({"id": 1, "title": "Hi", "published_at": "2024-06-01T12:00:00"})
# from JSON string
post2 = Post.model_validate_json('{"id":2,"title":"Bye","published_at":"2024-07-01T00:00:00"}')
print(post2.published_at.year) # 2024JSON schema generation
Pydantic can emit a JSON Schema for your models - useful for documentation or validation on the client side:
from pydantic import BaseModel, Field
from typing import Optional
import json
class CreateUserRequest(BaseModel):
username: str = Field(min_length=3, max_length=32, description="Unique username")
email: str = Field(description="User's email address")
age: Optional[int] = Field(default=None, ge=0, le=150)
schema = CreateUserRequest.model_json_schema()
print(json.dumps(schema, indent=2))Output:
{
"title": "CreateUserRequest",
"type": "object",
"properties": {
"username": {"type": "string", "minLength": 3, "maxLength": 32},
"email": {"type": "string"},
"age": {"anyOf": [{"type": "integer"}, {"type": "null"}]}
},
"required": ["username", "email"]
}Settings from environment variables
BaseSettings reads values from environment variables (and .env files via python-dotenv):
bunpy add pydantic-settings python-dotenvfrom pydantic_settings import BaseSettings, SettingsConfigDict
from pydantic import Field
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
database_url: str = Field(description="Postgres connection string")
secret_key: str = Field(min_length=32)
debug: bool = False
max_connections: int = 10
allowed_hosts: list[str] = ["localhost"]
settings = Settings()
print(settings.database_url)
print(settings.debug)Create a .env file:
DATABASE_URL=postgresql://user:pass@localhost/mydb
SECRET_KEY=supersecretkey1234567890abcdef1234
DEBUG=true
ALLOWED_HOSTS=["localhost","example.com"]Run:
bunpy app.pyDiscriminated unions
When a field can hold one of several model types, use a Literal discriminator so Pydantic picks the right model at parse time:
from pydantic import BaseModel
from typing import Literal, Union, Annotated
from pydantic import Field
class CreditCard(BaseModel):
payment_type: Literal["credit_card"]
card_number: str
expiry: str
cvv: str
class BankTransfer(BaseModel):
payment_type: Literal["bank_transfer"]
iban: str
bic: str
class PayPal(BaseModel):
payment_type: Literal["paypal"]
email: str
PaymentMethod = Annotated[
Union[CreditCard, BankTransfer, PayPal],
Field(discriminator="payment_type"),
]
class Order(BaseModel):
id: int
amount: float
payment: PaymentMethod
order = Order.model_validate({
"id": 99,
"amount": 49.99,
"payment": {"payment_type": "paypal", "email": "buyer@example.com"},
})
print(type(order.payment).__name__) # PayPal
print(order.payment.email)Real-world: API request and response models
A complete pattern for a FastAPI-style endpoint - even if you are not using FastAPI, defining explicit request/response shapes makes your code self-documenting and easy to test:
from pydantic import BaseModel, field_validator, model_validator
from typing import Optional
from datetime import datetime
# --- Request models ---
class CreatePostRequest(BaseModel):
title: str
body: str
tags: list[str] = []
published: bool = False
@field_validator("title")
@classmethod
def title_not_empty(cls, v: str) -> str:
v = v.strip()
if not v:
raise ValueError("title cannot be blank")
return v
@field_validator("tags")
@classmethod
def normalize_tags(cls, tags: list[str]) -> list[str]:
return [t.lower().strip() for t in tags if t.strip()]
class UpdatePostRequest(BaseModel):
title: Optional[str] = None
body: Optional[str] = None
tags: Optional[list[str]] = None
published: Optional[bool] = None
# --- Response models ---
class AuthorResponse(BaseModel):
id: int
username: str
class PostResponse(BaseModel):
id: int
title: str
body: str
tags: list[str]
published: bool
author: AuthorResponse
created_at: datetime
updated_at: datetime
# --- Simulate a handler ---
def create_post(request_body: dict, author_id: int) -> dict:
req = CreatePostRequest.model_validate(request_body)
post = PostResponse(
id=1,
title=req.title,
body=req.body,
tags=req.tags,
published=req.published,
author=AuthorResponse(id=author_id, username="alice"),
created_at=datetime.now(),
updated_at=datetime.now(),
)
return post.model_dump(mode="json")
result = create_post(
{"title": " Hello World ", "body": "Content here.", "tags": ["Python", "API"]},
author_id=7,
)
print(result["title"]) # Hello World
print(result["tags"]) # ['python', 'api']Run the examples
bunpy validate.pyPydantic v2 is built in Rust and is dramatically faster than v1. Use it as the canonical source of truth for your data shapes - validators, serialization, and schema generation all live in one place.