I am trying to use sqlmodel to insert records in the database where the data is as shown below. A house object with a color and many positions. Locations will also be associated with many houses. The input is:
[ { "color": "red", "locations": [ {"type": "country", "name": "netherlands"}, {"type": "municipality", "name": "amsterdam"}, ], }, { "color": "green", "locations": [ {"type": "country", "name": "netherlands"}, {"type": "municipality", "name": "amsterdam"}, ], }, ]
Here is a reproducible example of what I'm trying to do:
import asyncio from typing import list from sqlalchemy.ext.asyncio import create_async_engine from sqlalchemy.orm import sessionmaker from sqlmodel import field, relationship, sqlmodel, uniqueconstraint from sqlmodel.ext.asyncio.session import asyncsession database_url = "sqlite+aiosqlite:///./database.db" engine = create_async_engine(database_url, echo=true, future=true) async def init_db() -> none: async with engine.begin() as conn: await conn.run_sync(sqlmodel.metadata.create_all) sessionlocal = sessionmaker( autocommit=false, autoflush=false, bind=engine, class_=asyncsession, expire_on_commit=false, ) class houselocationlink(sqlmodel, table=true): house_id: int = field(foreign_key="house.id", nullable=false, primary_key=true) location_id: int = field( foreign_key="location.id", nullable=false, primary_key=true ) class location(sqlmodel, table=true): id: int = field(primary_key=true) type: str # country, county, municipality, district, city, area, street, etc name: str # amsterdam, germany, my street, etc houses: list["house"] = relationship( back_populates="locations", link_model=houselocationlink, ) __table_args__ = (uniqueconstraint("type", "name"),) class house(sqlmodel, table=true): id: int = field(primary_key=true) color: str = field() locations: list["location"] = relationship( back_populates="houses", link_model=houselocationlink, ) # other fields... data = [ { "color": "red", "locations": [ {"type": "country", "name": "netherlands"}, {"type": "municipality", "name": "amsterdam"}, ], }, { "color": "green", "locations": [ {"type": "country", "name": "netherlands"}, {"type": "municipality", "name": "amsterdam"}, ], }, ] async def add_houses(payload) -> list[house]: result = [] async with sessionlocal() as session: for item in payload: locations = [] for location in item["locations"]: locations.append(location(**location)) house = house(color=item["color"], locations=locations) result.append(house) session.add_all(result) await session.commit() asyncio.run(init_db()) asyncio.run(add_houses(data))
The problem is that when I run this code it tries to insert a duplicate location object along with the house object.
I would like to be able to use relationship
here as it makes accessing house.locations
very easy.
However, I can't figure out how to stop it from trying to insert duplicate positions. Ideally, I would have a mapper function that does the get_or_create
position.
The best I've seen that does this is sqlalchemy's associated proxies. But it looks like sqlmodel doesn't support this.
Does anyone know how to achieve this? If you know how to accomplish this using sqlalchemy instead of sqlmodel, I'd be interested in seeing your solution. I haven't started this project yet, so I might as well use sqlalchemy if it makes my life easier.
I also tried to adjust using sa_relationship_kwargs
like
sa_relationship_kwargs={ "lazy": "selectin", "cascade": "none", "viewonly": "true", }
But this will prevent the associated entry from being added to the houselocationlink
table.
Any pointers would be greatly appreciated. Even if it means completely changing my approach.
Thanks!
I am writing this solution because you mentioned that you would like to use sqlalchemy
. As you mentioned, you need the associated proxy, but you also need the "unique object". I've adjusted this to function async query (rather than sync), consistent with my personal preference, all without significantly changing the logic.
import asyncio from sqlalchemy import UniqueConstraint, ForeignKey, select, text, func from sqlalchemy.orm import DeclarativeBase, mapped_column, Mapped, relationship from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from sqlalchemy.ext.associationproxy import AssociationProxy, association_proxy class Base(DeclarativeBase): pass class UniqueMixin: cache = {} @classmethod async def as_unique(cls, session: AsyncSession, *args, **kwargs): key = cls, cls.unique_hash(*args, **kwargs) if key in cls.cache: return cls.cache[key] with session.no_autoflush: statement = select(cls).where(cls.unique_filter(*args, **kwargs)).limit(1) obj = (await session.scalars(statement)).first() if obj is None: obj = cls(*args, **kwargs) session.add(obj) cls.cache[key] = obj return obj @classmethod def unique_hash(cls, *args, **kwargs): raise NotImplementedError("Implement this in subclass") @classmethod def unique_filter(cls, *args, **kwargs): raise NotImplementedError("Implement this in subclass") class Location(UniqueMixin, Base): __tablename__ = "location" id: Mapped[int] = mapped_column(primary_key=True) name: Mapped[str] = mapped_column() type: Mapped[str] = mapped_column() house_associations: Mapped[list["HouseLocationLink"]] = relationship(back_populates="location") __table_args = (UniqueConstraint(type, name),) @classmethod def unique_hash(cls, name, type): # this is the key for the dict return type, name @classmethod def unique_filter(cls, name, type): # this is how you want to establish the uniqueness # the result of this filter will be the value in the dict return (cls.type == type) & (cls.name == name) class House(Base): __tablename__ = "house" id: Mapped[int] = mapped_column(primary_key=True) name: Mapped[str] = mapped_column() location_associations: Mapped[list["HouseLocationLink"]] = relationship(back_populates="house") locations: AssociationProxy[list[Location]] = association_proxy( "location_associations", "location", # you need this so you can directly add ``Location`` objects to ``House`` creator=lambda location: HouseLocationLink(location=location), ) class HouseLocationLink(Base): __tablename__ = "houselocationlink" house_id: Mapped[int] = mapped_column(ForeignKey(House.id), primary_key=True) location_id: Mapped[int] = mapped_column(ForeignKey(Location.id), primary_key=True) location: Mapped[Location] = relationship(back_populates="house_associations") house: Mapped[House] = relationship(back_populates="location_associations") engine = create_async_engine("sqlite+aiosqlite:///test.sqlite") async def main(): data = [ { "name": "red", "locations": [ {"type": "country", "name": "Netherlands"}, {"type": "municipality", "name": "Amsterdam"}, ], }, { "name": "green", "locations": [ {"type": "country", "name": "Netherlands"}, {"type": "municipality", "name": "Amsterdam"}, ], }, ] async with engine.begin() as conn: await conn.run_sync(Base.metadata.create_all) async with AsyncSession(engine) as session, session.begin(): for item in data: house = House( name=item["name"], locations=[await Location.as_unique(session, **location) for location in item["locations"]] ) session.add(house) async with AsyncSession(engine) as session: statement = select(func.count(text("*")), Location) assert await session.scalar(statement) == 2 statement = select(func.count(text("*")), House) assert await session.scalar(statement) == 2 statement = select(func.count(text("*")), HouseLocationLink) assert await session.scalar(statement) == 4 asyncio.run(main())
You can notice that the assertion does pass, no unique constraints are violated, and there are no multiple insertions. I've left some inline comments mentioning the "critical" aspects of this code. If you run this code multiple times, you will notice that only the new house
object and the corresponding houselocationlink
are added, but not the new location
object. Only one query is made per key-value pair to cache this behavior.
The above is the detailed content of Using SQLModel to insert a many-to-many relationship object when one side of the relationship already exists in the database. For more information, please follow other related articles on the PHP Chinese website!