-
Notifications
You must be signed in to change notification settings - Fork 85
Description
Describe the bug or question
If a pydantic select schema is provided to get_multi and includes a relationship field to some other select schema, get_multi will execute a cartesian product between the tables (selecting both) resulting in a boolean value.
To Reproduce
from collections.abc import AsyncGenerator
import asyncio
from contextlib import asynccontextmanager
from typing import Optional
from fastcrud import FastCRUD
from sqlalchemy import (
Column,
Integer,
String,
ForeignKey,
Boolean,
DateTime
)
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker, DeclarativeBase, relationship
from pydantic import BaseModel, ConfigDict
from fastcrud.crud.fast_crud import FastCRUD
class Base(DeclarativeBase):
pass
class ModelTest(Base):
__tablename__ = "test"
id = Column(Integer, primary_key=True)
name = Column(String(32))
tier_id = Column(Integer, ForeignKey("tier.id"))
tier = relationship("TierModel", back_populates="tests")
is_deleted = Column(Boolean, default=False)
deleted_at = Column(DateTime, nullable=True, default=None)
class TierModel(Base):
__tablename__ = "tier"
id = Column(Integer, primary_key=True)
name = Column(String(32), unique=True)
tests = relationship("ModelTest", back_populates="tier")
class TierReadSchema(BaseModel):
name: str
class TestReadSchema(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
tier_id: int
tier: Optional[TierReadSchema] = None
@asynccontextmanager
async def _async_session(url: str) -> AsyncGenerator[AsyncSession]:
async_engine = create_async_engine(url, echo=True, future=True)
session = sessionmaker(async_engine, class_=AsyncSession, expire_on_commit=False)
async with session() as s:
async with async_engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
yield s
async with async_engine.begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await async_engine.dispose()
def test_data() -> list[dict]:
return [
{"id": 1, "name": "Charlie", "tier_id": 1},
{"id": 2, "name": "Alice", "tier_id": 2},
{"id": 3, "name": "Bob", "tier_id": 1},
{"id": 4, "name": "David", "tier_id": 2},
{"id": 5, "name": "Eve", "tier_id": 1},
{"id": 6, "name": "Frank", "tier_id": 2},
{"id": 7, "name": "Grace", "tier_id": 1},
{"id": 8, "name": "Hannah", "tier_id": 2},
{"id": 9, "name": "Ivan", "tier_id": 1},
{"id": 10, "name": "Judy", "tier_id": 2},
{"id": 11, "name": "Alice", "tier_id": 1},
]
def test_data_tier() -> list[dict]:
return [{"id": 1, "name": "Premium"}, {"id": 2, "name": "Basic"}]
async def test(async_session, test_data, test_data_tier):
for tier_item in test_data_tier:
async_session.add(TierModel(**tier_item))
await async_session.commit()
for user_item in test_data:
async_session.add(ModelTest(**user_item))
await async_session.commit()
crud = FastCRUD(ModelTest)
result = await crud.get_joined(
db=async_session,
join_model=TierModel,
join_prefix="tier_",
schema_to_select=TestReadSchema,
join_schema_to_select=TierReadSchema,
nest_joins=True
)
print(result)
result = await crud.get_multi(
db=async_session,
schema_to_select=TestReadSchema
)
print(result)
async def main():
async with _async_session(url="sqlite+aiosqlite:///:memory:") as session:
await test(session, test_data(), test_data_tier())
if __name__ == "__main__":
asyncio.run(main())
Description
This results in the following log:
2025-02-09 16:34:58,858 INFO sqlalchemy.engine.Engine SELECT test.name, test.tier_id, tier.id = test.tier_id AS anon_1, tier.name AS joined__tier_name
FROM test LEFT OUTER JOIN tier ON tier.id = test.tier_id
2025-02-09 16:34:58,858 INFO sqlalchemy.engine.Engine [generated in 0.00010s] ()
{'name': 'Charlie', 'tier_id': 1, 'tier': {'name': 'Premium'}}
sys:1: SAWarning: SELECT statement has a cartesian product between FROM element(s) "tier" and FROM element "test". Apply join condition(s) between each element to resolve.
2025-02-09 16:34:58,859 INFO sqlalchemy.engine.Engine SELECT test.name, test.tier_id, tier.id = test.tier_id AS anon_1
FROM test, tier
LIMIT ? OFFSET ?
2025-02-09 16:34:58,859 INFO sqlalchemy.engine.Engine [generated in 0.00009s] (100, 0)
2025-02-09 16:34:58,861 INFO sqlalchemy.engine.Engine SELECT count(*) AS count_1
FROM test
2025-02-09 16:34:58,861 INFO sqlalchemy.engine.Engine [generated in 0.00008s] ()
{'data': [{'name': 'Charlie', 'tier_id': 1, 'tier': True}, {'name': 'Charlie', 'tier_id': 1, 'tier': False}, {'name': 'Alice', 'tier_id': 2, 'tier': False}, {'name': 'Alice', 'tier_id': 2, 'tier': True}, {'name': 'Bob', 'tier_id': 1, 'tier': True}, {'name': 'Bob', 'tier_id': 1, 'tier': False}, {'name': 'David', 'tier_id': 2, 'tier': False}, {'name': 'David', 'tier_id': 2, 'tier': True}, {'name': 'Eve', 'tier_id': 1, 'tier': True}, {'name': 'Eve', 'tier_id': 1, 'tier': False}, {'name': 'Frank', 'tier_id': 2, 'tier': False}, {'name': 'Frank', 'tier_id': 2, 'tier': True}, {'name': 'Grace', 'tier_id': 1, 'tier': True}, {'name': 'Grace', 'tier_id': 1, 'tier': False}, {'name': 'Hannah', 'tier_id': 2, 'tier': False}, {'name': 'Hannah', 'tier_id': 2, 'tier': True}, {'name': 'Ivan', 'tier_id': 1, 'tier': True}, {'name': 'Ivan', 'tier_id': 1, 'tier': False}, {'name': 'Judy', 'tier_id': 2, 'tier': False}, {'name': 'Judy', 'tier_id': 2, 'tier': True}, {'name': 'Alice', 'tier_id': 1, 'tier': True}, {'name': 'Alice', 'tier_id': 1, 'tier': False}], 'total_count': 11}
This becomes a problem for example for the read_multi endpoint if I provide a select_schema to the EndpointCreator, because FastAPI will validate against the pydantic select model (the pydantic validation also fails if return_as_model=True). Generally, I don't think relationship fields should be automatically included in the select statement as they should be explicitly handled by a join statement instead. But automatically joining all relationships is of course not feasible, so these relationship fields should just be skipped if using the "_extract_matching_columns_from_schema"-function. This can be easily achieved by just changing https://github.com/igorbenav/fastcrud/blob/82b6b17ec6bdae0ef6069f7ad7fdaf7518d6714b/fastcrud/crud/helper.py#L80 to
if hasattr(model_or_alias, field) and field not in mapper.relationships:
resulting in the "correct" output:
2025-02-09 16:45:12,381 INFO sqlalchemy.engine.Engine SELECT test.name, test.tier_id, tier.name AS joined__tier_name
FROM test LEFT OUTER JOIN tier ON tier.id = test.tier_id
2025-02-09 16:45:12,381 INFO sqlalchemy.engine.Engine [generated in 0.00008s] ()
{'name': 'Charlie', 'tier_id': 1, 'tier': {'name': 'Premium'}}
2025-02-09 16:45:12,382 INFO sqlalchemy.engine.Engine SELECT test.name, test.tier_id
FROM test
LIMIT ? OFFSET ?
2025-02-09 16:45:12,382 INFO sqlalchemy.engine.Engine [generated in 0.00008s] (100, 0)
2025-02-09 16:45:12,383 INFO sqlalchemy.engine.Engine SELECT count(*) AS count_1
FROM test
2025-02-09 16:45:12,384 INFO sqlalchemy.engine.Engine [generated in 0.00007s] ()
{'data': [{'name': 'Charlie', 'tier_id': 1}, {'name': 'Alice', 'tier_id': 2}, {'name': 'Bob', 'tier_id': 1}, {'name': 'David', 'tier_id': 2}, {'name': 'Eve', 'tier_id': 1}, {'name': 'Frank', 'tier_id': 2}, {'name': 'Grace', 'tier_id': 1}, {'name': 'Hannah', 'tier_id': 2}, {'name': 'Ivan', 'tier_id': 1}, {'name': 'Judy', 'tier_id': 2}, {'name': 'Alice', 'tier_id': 1}], 'total_count': 11}
If nothing speaks against implementing this minor change, I will just create a PR for it.