ограничение векторного поиска, смена llm и проверка объёма ОЗУ

This commit is contained in:
2026-02-07 18:49:20 +03:00
parent 8ad70cdb7c
commit 5b7ea9276b
12 changed files with 88 additions and 33 deletions
+8 -17
View File
@@ -11,20 +11,10 @@ services:
- ./data/db:/var/lib/postgresql/data - ./data/db:/var/lib/postgresql/data
networks: networks:
- proxy - proxy
ports: # !только локальный тест! #ports: # !только локальный тест!
- 5432:5432 # - 5432:5432
env_file: env_file:
- ./.env - ./.env
command:
- "postgres"
- "-c"
- "wal_level=logical"
- "-c"
- "max_replication_slots=10"
- "-c"
- "max_wal_senders=10"
- "-c"
- "listen_addresses=*"
healthcheck: healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"] test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"]
interval: 10s interval: 10s
@@ -43,19 +33,20 @@ services:
- ./data/llm:/root/.ollama - ./data/llm:/root/.ollama
networks: networks:
- proxy - proxy
ports: # !только локальный тест! #ports: # !только локальный тест!
- 11434:11434 # - 11434:11434
env_file: env_file:
- ./.env - ./.env
healthcheck: healthcheck:
test: ["CMD", "ollama", "list"] test: ["CMD", "ollama", "list"]
start_period: 10s
interval: 10s interval: 10s
timeout: 5s timeout: 5s
retries: 5 retries: 5
deploy: deploy:
resources: resources:
limits: limits:
memory: 5g memory: 6g
api: api:
build: . build: .
@@ -68,8 +59,8 @@ services:
max-file: "3" max-file: "3"
networks: networks:
- proxy - proxy
ports: # !только локальный тест! #ports: # !только локальный тест!
- 8000:8000 # - 8000:8000
env_file: env_file:
- ./.env - ./.env
volumes: volumes:
+1 -1
View File
@@ -7,7 +7,7 @@ POSTGRES_DB=lib
# Ollama # Ollama
OLLAMA_URL="http://llm:11434" OLLAMA_URL="http://llm:11434"
OLLAMA_MAX_LOADED_MODELS=1 OLLAMA_MAX_LOADED_MODELS=2
OLLAMA_NUM_THREADS=4 OLLAMA_NUM_THREADS=4
OLLAMA_KEEP_ALIVE=5m OLLAMA_KEEP_ALIVE=5m
+1 -1
View File
@@ -7,7 +7,7 @@ POSTGRES_DB=lib
# Ollama # Ollama
OLLAMA_URL="http://localhost:11434" OLLAMA_URL="http://localhost:11434"
OLLAMA_MAX_LOADED_MODELS=1 OLLAMA_MAX_LOADED_MODELS=2
OLLAMA_NUM_THREADS=4 OLLAMA_NUM_THREADS=4
OLLAMA_KEEP_ALIVE=5m OLLAMA_KEEP_ALIVE=5m
+5 -1
View File
@@ -28,12 +28,14 @@ from .core import (
decode_token, decode_token,
authenticate_user, authenticate_user,
get_current_user, get_current_user,
get_optional_user,
get_current_active_user, get_current_active_user,
get_user_from_partial_token, get_user_from_partial_token,
require_role, require_role,
require_any_role, require_any_role,
is_user_staff, is_user_staff,
is_user_admin, is_user_admin,
OptionalAuth,
RequireAuth, RequireAuth,
RequireAdmin, RequireAdmin,
RequireMember, RequireMember,
@@ -91,10 +93,12 @@ __all__ = [
"authenticate_user", "authenticate_user",
"get_current_user", "get_current_user",
"get_current_active_user", "get_current_active_user",
"get_optional_user",
"require_role", "require_role",
"require_any_role", "require_any_role",
"is_user_staff", "is_user_staff",
"is_user_admin", "is_user_admin",
"OptionalAuth",
"RequireAuth", "RequireAuth",
"RequireAdmin", "RequireAdmin",
"RequireMember", "RequireMember",
@@ -107,7 +111,7 @@ __all__ = [
"get_provisioning_uri", "get_provisioning_uri",
"verify_totp_code", "verify_totp_code",
"qr_to_bitmap_b64", "qr_to_bitmap_b64",
"generate_totp_setup," "generate_codes_for_user", "generate_totp_setup,generate_codes_for_user",
"verify_and_use_code", "verify_and_use_code",
"get_codes_status", "get_codes_status",
"CODES_COUNT", "CODES_COUNT",
+19 -1
View File
@@ -46,7 +46,7 @@ SECRET_KEY = os.getenv("SECRET_KEY")
logger = get_logger() logger = get_logger()
# OAuth2 схема # OAuth2 схема
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token") oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token", auto_error=False)
class KeyDeriver: class KeyDeriver:
@@ -217,6 +217,23 @@ def authenticate_user(session: Session, username: str, password: str) -> User |
return user return user
def get_optional_user(
token: Annotated[str | None, Depends(oauth2_scheme)],
session: Session = Depends(get_session),
) -> User | None:
"""Возвращает текущего пользователя или None, если не авторизован"""
if not token:
return None
try:
token_data = decode_token(token)
user = session.get(User, token_data.user_id)
if user and user.is_active:
return user
except HTTPException:
pass
return None
def get_current_user( def get_current_user(
token: Annotated[str, Depends(oauth2_scheme)], token: Annotated[str, Depends(oauth2_scheme)],
session: Session = Depends(get_session), session: Session = Depends(get_session),
@@ -299,6 +316,7 @@ def require_any_role(allowed_roles: list[str]):
# Создание dependencies # Создание dependencies
OptionalAuth = Annotated[User | None, Depends(get_optional_user)]
RequireAuth = Annotated[User, Depends(get_current_active_user)] RequireAuth = Annotated[User, Depends(get_current_active_user)]
RequireAdmin = Annotated[User, Depends(require_role("admin"))] RequireAdmin = Annotated[User, Depends(require_role("admin"))]
RequireMember = Annotated[User, Depends(require_role("member"))] RequireMember = Annotated[User, Depends(require_role("member"))]
+9 -3
View File
@@ -1,6 +1,6 @@
"""Основной модуль""" """Основной модуль"""
import asyncio, sys, traceback import asyncio, psutil, sys, traceback
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
@@ -56,12 +56,18 @@ async def lifespan(_):
logger.error(f"[-] Seeding failed: {e}") logger.error(f"[-] Seeding failed: {e}")
logger.info("[+] Loading ollama models...") logger.info("[+] Loading ollama models...")
ollama_client = Client(host=OLLAMA_URL)
try: try:
ollama_client = Client(host=OLLAMA_URL)
ollama_client.pull("mxbai-embed-large") ollama_client.pull("mxbai-embed-large")
ollama_client.pull("llama3.2")
total_memory_bytes = psutil.virtual_memory().total
total_memory_gb = total_memory_bytes / (1024 ** 3)
if total_memory_gb > 5:
ollama_client.pull("qwen3:4b")
except ResponseError as e: except ResponseError as e:
logger.error(f"[-] Failed to pull models {e}") logger.error(f"[-] Failed to pull models {e}")
asyncio.create_task(cleanup_task()) asyncio.create_task(cleanup_task())
logger.info("[+] Starting application...") logger.info("[+] Starting application...")
yield # Обработка запросов yield # Обработка запросов
+11 -6
View File
@@ -13,7 +13,7 @@ from sqlalchemy import text, case, distinct
from sqlalchemy.orm import selectinload, defer from sqlalchemy.orm import selectinload, defer
from sqlmodel import Session, select, col, func from sqlmodel import Session, select, col, func
from library_service.auth import RequireStaff from library_service.auth import RequireStaff, OptionalAuth
from library_service.services import transcode_image from library_service.services import transcode_image
from library_service.settings import get_session, OLLAMA_URL, BOOKS_PREVIEW_DIR from library_service.settings import get_session, OLLAMA_URL, BOOKS_PREVIEW_DIR
from library_service.models.enums import BookStatus from library_service.models.enums import BookStatus
@@ -62,6 +62,7 @@ from sqlalchemy.orm import selectinload
@router.get("/filter", response_model=BookFilteredList) @router.get("/filter", response_model=BookFilteredList)
def filter_books( def filter_books(
current_user: OptionalAuth,
session: Session = Depends(get_session), session: Session = Depends(get_session),
q: str | None = Query(None, max_length=50, description="Поиск"), q: str | None = Query(None, max_length=50, description="Поиск"),
min_page_count: int | None = Query(None, ge=0), min_page_count: int | None = Query(None, ge=0),
@@ -100,12 +101,16 @@ def filter_books(
total = session.scalar(count_statement) total = session.scalar(count_statement)
if q: if q:
emb = ollama_client.embeddings(model="mxbai-embed-large", prompt=q)["embedding"] if current_user:
distance_col = Book.embedding.cosine_distance(emb) # ty: ignore emb = ollama_client.embeddings(model="mxbai-embed-large", prompt=q)["embedding"]
statement = statement.where(Book.embedding.is_not(None)) # ty: ignore distance_col = Book.embedding.cosine_distance(emb) # ty: ignore
statement = statement.where(Book.embedding.is_not(None)) # ty: ignore
keyword_match = case((Book.title.ilike(f"%{q}%"), 0), else_=1) # ty: ignore keyword_match = case((Book.title.ilike(f"%{q}%"), 0), else_=1) # ty: ignore
statement = statement.order_by(keyword_match, distance_col) statement = statement.order_by(keyword_match, distance_col)
else:
statement = statement.where(Book.title.ilike(f"%{q}%")) # ty: ignore
statement = statement.order_by(Book.id) # ty: ignore
else: else:
statement = statement.order_by(Book.id) # ty: ignore statement = statement.order_by(Book.id) # ty: ignore
+1 -1
View File
@@ -731,7 +731,7 @@ $(document).ready(() => {
} }
try { try {
const data = await Api.get("/api/users?skip=0&limit=500"); const data = await Api.get("/api/users/?skip=0&limit=500");
cachedUsers = data.users; cachedUsers = data.users;
renderUsersList(cachedUsers); renderUsersList(cachedUsers);
} catch (error) { } catch (error) {
+1 -1
View File
@@ -504,7 +504,7 @@ $(() => {
$(SELECTORS.adminActions).removeClass("hidden"); $(SELECTORS.adminActions).removeClass("hidden");
} }
Promise.all([Api.get("/api/authors"), Api.get("/api/genres")]) Promise.all([Api.get("/api/authors/"), Api.get("/api/genres/")])
.then(([authorsData, genresData]) => { .then(([authorsData, genresData]) => {
initAuthors(authorsData.authors || []); initAuthors(authorsData.authors || []);
initGenres(genresData.genres || []); initGenres(genresData.genres || []);
+1 -1
View File
@@ -21,7 +21,7 @@ $(document).ready(() => {
showLoadingState(); showLoadingState();
Promise.all([ Promise.all([
Api.get("/api/users?skip=0&limit=100"), Api.get("/api/users/?skip=0&limit=100"),
Api.get("/api/users/roles"), Api.get("/api/users/roles"),
]) ])
.then(([usersData, rolesData]) => { .then(([usersData, rolesData]) => {
+1
View File
@@ -25,6 +25,7 @@ dependencies = [
"limits>=5.6.0", "limits>=5.6.0",
"ollama>=0.6.1", "ollama>=0.6.1",
"pgvector>=0.4.2", "pgvector>=0.4.2",
"psutil>=7.2.2",
] ]
[dependency-groups] [dependency-groups]
Generated
+30
View File
@@ -639,6 +639,7 @@ dependencies = [
{ name = "ollama" }, { name = "ollama" },
{ name = "passlib", extra = ["argon2"] }, { name = "passlib", extra = ["argon2"] },
{ name = "pgvector" }, { name = "pgvector" },
{ name = "psutil" },
{ name = "psycopg2-binary" }, { name = "psycopg2-binary" },
{ name = "pydantic", extra = ["email"] }, { name = "pydantic", extra = ["email"] },
{ name = "pyotp" }, { name = "pyotp" },
@@ -671,6 +672,7 @@ requires-dist = [
{ name = "ollama", specifier = ">=0.6.1" }, { name = "ollama", specifier = ">=0.6.1" },
{ name = "passlib", extras = ["argon2"], specifier = ">=1.7.4" }, { name = "passlib", extras = ["argon2"], specifier = ">=1.7.4" },
{ name = "pgvector", specifier = ">=0.4.2" }, { name = "pgvector", specifier = ">=0.4.2" },
{ name = "psutil", specifier = ">=7.2.2" },
{ name = "psycopg2-binary", specifier = ">=2.9.11" }, { name = "psycopg2-binary", specifier = ">=2.9.11" },
{ name = "pydantic", extras = ["email"], specifier = ">=2.12.5" }, { name = "pydantic", extras = ["email"], specifier = ">=2.12.5" },
{ name = "pyotp", specifier = ">=2.9.0" }, { name = "pyotp", specifier = ">=2.9.0" },
@@ -1078,6 +1080,34 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
] ]
[[package]]
name = "psutil"
version = "7.2.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" },
{ url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" },
{ url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" },
{ url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" },
{ url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" },
{ url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" },
{ url = "https://files.pythonhosted.org/packages/81/69/ef179ab5ca24f32acc1dac0c247fd6a13b501fd5534dbae0e05a1c48b66d/psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00", size = 130664, upload-time = "2026-01-28T18:15:09.469Z" },
{ url = "https://files.pythonhosted.org/packages/7b/64/665248b557a236d3fa9efc378d60d95ef56dd0a490c2cd37dafc7660d4a9/psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9", size = 131087, upload-time = "2026-01-28T18:15:11.724Z" },
{ url = "https://files.pythonhosted.org/packages/d5/2e/e6782744700d6759ebce3043dcfa661fb61e2fb752b91cdeae9af12c2178/psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a", size = 182383, upload-time = "2026-01-28T18:15:13.445Z" },
{ url = "https://files.pythonhosted.org/packages/57/49/0a41cefd10cb7505cdc04dab3eacf24c0c2cb158a998b8c7b1d27ee2c1f5/psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf", size = 185210, upload-time = "2026-01-28T18:15:16.002Z" },
{ url = "https://files.pythonhosted.org/packages/dd/2c/ff9bfb544f283ba5f83ba725a3c5fec6d6b10b8f27ac1dc641c473dc390d/psutil-7.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1", size = 141228, upload-time = "2026-01-28T18:15:18.385Z" },
{ url = "https://files.pythonhosted.org/packages/f2/fc/f8d9c31db14fcec13748d373e668bc3bed94d9077dbc17fb0eebc073233c/psutil-7.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841", size = 136284, upload-time = "2026-01-28T18:15:19.912Z" },
{ url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" },
{ url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" },
{ url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" },
{ url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" },
{ url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" },
{ url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" },
{ url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" },
{ url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" },
]
[[package]] [[package]]
name = "psycopg2-binary" name = "psycopg2-binary"
version = "2.9.11" version = "2.9.11"