ограничение векторного поиска, смена llm и проверка объёма ОЗУ

This commit is contained in:
2026-02-07 18:49:20 +03:00
parent 8ad70cdb7c
commit 5b7ea9276b
12 changed files with 88 additions and 33 deletions
+8 -17
View File
@@ -11,20 +11,10 @@ services:
- ./data/db:/var/lib/postgresql/data
networks:
- proxy
ports: # !только локальный тест!
- 5432:5432
#ports: # !только локальный тест!
# - 5432:5432
env_file:
- ./.env
command:
- "postgres"
- "-c"
- "wal_level=logical"
- "-c"
- "max_replication_slots=10"
- "-c"
- "max_wal_senders=10"
- "-c"
- "listen_addresses=*"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"]
interval: 10s
@@ -43,19 +33,20 @@ services:
- ./data/llm:/root/.ollama
networks:
- proxy
ports: # !только локальный тест!
- 11434:11434
#ports: # !только локальный тест!
# - 11434:11434
env_file:
- ./.env
healthcheck:
test: ["CMD", "ollama", "list"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 5
deploy:
resources:
limits:
memory: 5g
memory: 6g
api:
build: .
@@ -68,8 +59,8 @@ services:
max-file: "3"
networks:
- proxy
ports: # !только локальный тест!
- 8000:8000
#ports: # !только локальный тест!
# - 8000:8000
env_file:
- ./.env
volumes:
+1 -1
View File
@@ -7,7 +7,7 @@ POSTGRES_DB=lib
# Ollama
OLLAMA_URL="http://llm:11434"
OLLAMA_MAX_LOADED_MODELS=1
OLLAMA_MAX_LOADED_MODELS=2
OLLAMA_NUM_THREADS=4
OLLAMA_KEEP_ALIVE=5m
+1 -1
View File
@@ -7,7 +7,7 @@ POSTGRES_DB=lib
# Ollama
OLLAMA_URL="http://localhost:11434"
OLLAMA_MAX_LOADED_MODELS=1
OLLAMA_MAX_LOADED_MODELS=2
OLLAMA_NUM_THREADS=4
OLLAMA_KEEP_ALIVE=5m
+5 -1
View File
@@ -28,12 +28,14 @@ from .core import (
decode_token,
authenticate_user,
get_current_user,
get_optional_user,
get_current_active_user,
get_user_from_partial_token,
require_role,
require_any_role,
is_user_staff,
is_user_admin,
OptionalAuth,
RequireAuth,
RequireAdmin,
RequireMember,
@@ -91,10 +93,12 @@ __all__ = [
"authenticate_user",
"get_current_user",
"get_current_active_user",
"get_optional_user",
"require_role",
"require_any_role",
"is_user_staff",
"is_user_admin",
"OptionalAuth",
"RequireAuth",
"RequireAdmin",
"RequireMember",
@@ -107,7 +111,7 @@ __all__ = [
"get_provisioning_uri",
"verify_totp_code",
"qr_to_bitmap_b64",
"generate_totp_setup," "generate_codes_for_user",
"generate_totp_setup,generate_codes_for_user",
"verify_and_use_code",
"get_codes_status",
"CODES_COUNT",
+19 -1
View File
@@ -46,7 +46,7 @@ SECRET_KEY = os.getenv("SECRET_KEY")
logger = get_logger()
# OAuth2 схема
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token")
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token", auto_error=False)
class KeyDeriver:
@@ -217,6 +217,23 @@ def authenticate_user(session: Session, username: str, password: str) -> User |
return user
def get_optional_user(
token: Annotated[str | None, Depends(oauth2_scheme)],
session: Session = Depends(get_session),
) -> User | None:
"""Возвращает текущего пользователя или None, если не авторизован"""
if not token:
return None
try:
token_data = decode_token(token)
user = session.get(User, token_data.user_id)
if user and user.is_active:
return user
except HTTPException:
pass
return None
def get_current_user(
token: Annotated[str, Depends(oauth2_scheme)],
session: Session = Depends(get_session),
@@ -299,6 +316,7 @@ def require_any_role(allowed_roles: list[str]):
# Создание dependencies
OptionalAuth = Annotated[User | None, Depends(get_optional_user)]
RequireAuth = Annotated[User, Depends(get_current_active_user)]
RequireAdmin = Annotated[User, Depends(require_role("admin"))]
RequireMember = Annotated[User, Depends(require_role("member"))]
+9 -3
View File
@@ -1,6 +1,6 @@
"""Основной модуль"""
import asyncio, sys, traceback
import asyncio, psutil, sys, traceback
from contextlib import asynccontextmanager
from datetime import datetime
from pathlib import Path
@@ -56,12 +56,18 @@ async def lifespan(_):
logger.error(f"[-] Seeding failed: {e}")
logger.info("[+] Loading ollama models...")
ollama_client = Client(host=OLLAMA_URL)
try:
ollama_client = Client(host=OLLAMA_URL)
ollama_client.pull("mxbai-embed-large")
ollama_client.pull("llama3.2")
total_memory_bytes = psutil.virtual_memory().total
total_memory_gb = total_memory_bytes / (1024 ** 3)
if total_memory_gb > 5:
ollama_client.pull("qwen3:4b")
except ResponseError as e:
logger.error(f"[-] Failed to pull models {e}")
asyncio.create_task(cleanup_task())
logger.info("[+] Starting application...")
yield # Обработка запросов
+11 -6
View File
@@ -13,7 +13,7 @@ from sqlalchemy import text, case, distinct
from sqlalchemy.orm import selectinload, defer
from sqlmodel import Session, select, col, func
from library_service.auth import RequireStaff
from library_service.auth import RequireStaff, OptionalAuth
from library_service.services import transcode_image
from library_service.settings import get_session, OLLAMA_URL, BOOKS_PREVIEW_DIR
from library_service.models.enums import BookStatus
@@ -62,6 +62,7 @@ from sqlalchemy.orm import selectinload
@router.get("/filter", response_model=BookFilteredList)
def filter_books(
current_user: OptionalAuth,
session: Session = Depends(get_session),
q: str | None = Query(None, max_length=50, description="Поиск"),
min_page_count: int | None = Query(None, ge=0),
@@ -100,12 +101,16 @@ def filter_books(
total = session.scalar(count_statement)
if q:
emb = ollama_client.embeddings(model="mxbai-embed-large", prompt=q)["embedding"]
distance_col = Book.embedding.cosine_distance(emb) # ty: ignore
statement = statement.where(Book.embedding.is_not(None)) # ty: ignore
if current_user:
emb = ollama_client.embeddings(model="mxbai-embed-large", prompt=q)["embedding"]
distance_col = Book.embedding.cosine_distance(emb) # ty: ignore
statement = statement.where(Book.embedding.is_not(None)) # ty: ignore
keyword_match = case((Book.title.ilike(f"%{q}%"), 0), else_=1) # ty: ignore
statement = statement.order_by(keyword_match, distance_col)
keyword_match = case((Book.title.ilike(f"%{q}%"), 0), else_=1) # ty: ignore
statement = statement.order_by(keyword_match, distance_col)
else:
statement = statement.where(Book.title.ilike(f"%{q}%")) # ty: ignore
statement = statement.order_by(Book.id) # ty: ignore
else:
statement = statement.order_by(Book.id) # ty: ignore
+1 -1
View File
@@ -731,7 +731,7 @@ $(document).ready(() => {
}
try {
const data = await Api.get("/api/users?skip=0&limit=500");
const data = await Api.get("/api/users/?skip=0&limit=500");
cachedUsers = data.users;
renderUsersList(cachedUsers);
} catch (error) {
+1 -1
View File
@@ -504,7 +504,7 @@ $(() => {
$(SELECTORS.adminActions).removeClass("hidden");
}
Promise.all([Api.get("/api/authors"), Api.get("/api/genres")])
Promise.all([Api.get("/api/authors/"), Api.get("/api/genres/")])
.then(([authorsData, genresData]) => {
initAuthors(authorsData.authors || []);
initGenres(genresData.genres || []);
+1 -1
View File
@@ -21,7 +21,7 @@ $(document).ready(() => {
showLoadingState();
Promise.all([
Api.get("/api/users?skip=0&limit=100"),
Api.get("/api/users/?skip=0&limit=100"),
Api.get("/api/users/roles"),
])
.then(([usersData, rolesData]) => {
+1
View File
@@ -25,6 +25,7 @@ dependencies = [
"limits>=5.6.0",
"ollama>=0.6.1",
"pgvector>=0.4.2",
"psutil>=7.2.2",
]
[dependency-groups]
Generated
+30
View File
@@ -639,6 +639,7 @@ dependencies = [
{ name = "ollama" },
{ name = "passlib", extra = ["argon2"] },
{ name = "pgvector" },
{ name = "psutil" },
{ name = "psycopg2-binary" },
{ name = "pydantic", extra = ["email"] },
{ name = "pyotp" },
@@ -671,6 +672,7 @@ requires-dist = [
{ name = "ollama", specifier = ">=0.6.1" },
{ name = "passlib", extras = ["argon2"], specifier = ">=1.7.4" },
{ name = "pgvector", specifier = ">=0.4.2" },
{ name = "psutil", specifier = ">=7.2.2" },
{ name = "psycopg2-binary", specifier = ">=2.9.11" },
{ name = "pydantic", extras = ["email"], specifier = ">=2.12.5" },
{ name = "pyotp", specifier = ">=2.9.0" },
@@ -1078,6 +1080,34 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
]
[[package]]
name = "psutil"
version = "7.2.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" },
{ url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" },
{ url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" },
{ url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" },
{ url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" },
{ url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" },
{ url = "https://files.pythonhosted.org/packages/81/69/ef179ab5ca24f32acc1dac0c247fd6a13b501fd5534dbae0e05a1c48b66d/psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00", size = 130664, upload-time = "2026-01-28T18:15:09.469Z" },
{ url = "https://files.pythonhosted.org/packages/7b/64/665248b557a236d3fa9efc378d60d95ef56dd0a490c2cd37dafc7660d4a9/psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9", size = 131087, upload-time = "2026-01-28T18:15:11.724Z" },
{ url = "https://files.pythonhosted.org/packages/d5/2e/e6782744700d6759ebce3043dcfa661fb61e2fb752b91cdeae9af12c2178/psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a", size = 182383, upload-time = "2026-01-28T18:15:13.445Z" },
{ url = "https://files.pythonhosted.org/packages/57/49/0a41cefd10cb7505cdc04dab3eacf24c0c2cb158a998b8c7b1d27ee2c1f5/psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf", size = 185210, upload-time = "2026-01-28T18:15:16.002Z" },
{ url = "https://files.pythonhosted.org/packages/dd/2c/ff9bfb544f283ba5f83ba725a3c5fec6d6b10b8f27ac1dc641c473dc390d/psutil-7.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1", size = 141228, upload-time = "2026-01-28T18:15:18.385Z" },
{ url = "https://files.pythonhosted.org/packages/f2/fc/f8d9c31db14fcec13748d373e668bc3bed94d9077dbc17fb0eebc073233c/psutil-7.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841", size = 136284, upload-time = "2026-01-28T18:15:19.912Z" },
{ url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" },
{ url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" },
{ url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" },
{ url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" },
{ url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" },
{ url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" },
{ url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" },
{ url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" },
]
[[package]]
name = "psycopg2-binary"
version = "2.9.11"