more AI optimizations, reworked logger

This commit is contained in:
2025-10-09 12:35:15 +02:00
parent 7e07bdea0c
commit 3cc6e793d2
22 changed files with 186 additions and 320 deletions

View File

@@ -1,28 +1,13 @@
import sys
from datetime import datetime
import loguru
import regex
import requests
from bs4 import BeautifulSoup
from src import LOG_DIR
from src.logic import BookData as Book
from src.shared.logging import log
URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
BASE = "https://rds.ibs-bw.de"
log = loguru.logger
log.remove()
log.add(sys.stdout, level="INFO")
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
log.add(
f"{LOG_DIR}/{datetime.now().strftime('%Y-%m-%d')}.log",
rotation="1 day",
retention="1 month",
)
class Catalogue:
def __init__(self, timeout=15):

View File

@@ -3,7 +3,6 @@ import json
import os
import re
import sqlite3 as sql
import sys
import tempfile
from dataclasses import asdict
from pathlib import Path
@@ -13,7 +12,7 @@ from typing import Any, List, Optional, Tuple, Union
import loguru
from src import DATABASE_DIR, LOG_DIR, settings
from src import DATABASE_DIR, settings
from src.backend.db import (
CREATE_ELSA_FILES_TABLE,
CREATE_ELSA_MEDIA_TABLE,
@@ -34,9 +33,6 @@ from src.logic.semester import Semester
from src.utils.blob import create_blob
log = loguru.logger
log.remove()
log.add(sys.stdout, level="INFO")
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
ascii_lowercase = lower + digits + punctuation
@@ -186,7 +182,13 @@ class Database:
Returns:
sql.Connection: The active connection to the database
"""
return sql.connect(self.db_path)
conn = sql.connect(self.db_path)
# Fast pragmas suitable for a desktop app DB
conn.execute("PRAGMA journal_mode=WAL;")
conn.execute("PRAGMA synchronous=NORMAL;")
conn.execute("PRAGMA temp_store=MEMORY;")
conn.execute("PRAGMA mmap_size=134217728;") # 128MB
return conn
def close_connection(self, conn: sql.Connection):
"""
@@ -214,6 +216,25 @@ class Database:
cursor.execute(CREATE_ELSA_TABLE)
cursor.execute(CREATE_ELSA_FILES_TABLE)
cursor.execute(CREATE_ELSA_MEDIA_TABLE)
# Helpful indices to speed up frequent lookups and joins
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_media_app_prof ON media(app_id, prof_id);"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_media_deleted ON media(deleted);"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_media_available ON media(available);"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_messages_remind_at ON messages(remind_at);"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_semesterapparat_prof ON semesterapparat(prof_id);"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_semesterapparat_appnr ON semesterapparat(appnr);"
)
conn.commit()
self.close_connection(conn)
@@ -227,7 +248,7 @@ class Database:
"""
conn = self.connect()
cursor = conn.cursor()
log.debug(f"Inserting {params} into database with query {query}")
log.debug(f"Inserting into DB: {query}")
cursor.execute(query, params)
conn.commit()
self.close_connection(conn)
@@ -1650,7 +1671,7 @@ class Database:
tempdir.mkdir(parents=True, exist_ok=True)
file = tempfile.NamedTemporaryFile(
delete=False, dir=tempdir_path, mode="wb", suffix=f".{filetype}"
delete=False, dir=tempdir, mode="wb", suffix=f".{filetype}"
)
file.write(blob)
# log.debug("file created")
@@ -1713,9 +1734,9 @@ class Database:
telnr = profdata.telnr
title = profdata.title
query = f"INSERT INTO prof (fname, lname, fullname, mail, telnr,titel) VALUES ('{fname}','{lname}','{fullname}','{mail}','{telnr}','{title}')"
query = "INSERT INTO prof (fname, lname, fullname, mail, telnr, titel) VALUES (?,?,?,?,?,?)"
log.debug(query)
cursor.execute(query)
cursor.execute(query, (fname, lname, fullname, mail, telnr, title))
conn.commit()
conn.close()
@@ -1758,10 +1779,10 @@ class Database:
fullname = profdata["profname"]
else:
fullname = profdata.name()
query = f"SELECT id FROM prof WHERE fullname = '{fullname}'"
query = "SELECT id FROM prof WHERE fullname = ?"
log.debug(query)
cursor.execute(query)
cursor.execute(query, (fullname,))
result = cursor.fetchone()
if result:
return result[0]
@@ -1776,10 +1797,10 @@ class Database:
"""
conn = self.connect()
cursor = conn.cursor()
query = f"SELECT * FROM prof WHERE fullname = '{fullname}'"
query = "SELECT * FROM prof WHERE fullname = ?"
log.debug(query)
result = cursor.execute(query).fetchone()
result = cursor.execute(query, (fullname,)).fetchone()
if result:
return Prof().from_tuple(result)
else:
@@ -1795,8 +1816,8 @@ class Database:
int | None: The id of the prof or None if not found
"""
query = f"SELECT prof_id from semesterapparat WHERE appnr = '{apprarat_id}' and deletion_status = 0"
data = self.query_db(query)
query = "SELECT prof_id from semesterapparat WHERE appnr = ? and deletion_status = 0"
data = self.query_db(query, (apprarat_id,))
if data:
log.info("Prof ID: " + str(data[0][0]))
return data[0][0]
@@ -1807,20 +1828,13 @@ class Database:
# get book data
new_apparat_id = apparat
new_prof_id = self.getProfIDByApparat(new_apparat_id)
query = f"""
INSERT INTO media (bookdata, app_id, prof_id, deleted, available, reservation)
SELECT
bookdata,
'{new_apparat_id}',
'{new_prof_id}',
0,
available,
reservation
FROM media
where id = '{book_id}'"""
query = (
"INSERT INTO media (bookdata, app_id, prof_id, deleted, available, reservation) "
"SELECT bookdata, ?, ?, 0, available, reservation FROM media WHERE id = ?"
)
connection = self.connect()
cursor = connection.cursor()
cursor.execute(query)
cursor.execute(query, (new_apparat_id, new_prof_id, book_id))
connection.commit()
connection.close()
@@ -1832,16 +1846,18 @@ class Database:
appratat (int): the ID of the new apparat
"""
# get book data
query = f"UPDATE media SET app_id = '{appratat}' WHERE id = '{book_id}'"
query = "UPDATE media SET app_id = ? WHERE id = ?"
connection = self.connect()
cursor = connection.cursor()
cursor.execute(query)
cursor.execute(query, (appratat, book_id))
connection.commit()
connection.close()
def getApparatNameByAppNr(self, appnr: int):
query = f"SELECT name FROM semesterapparat WHERE appnr = '{appnr}' and deletion_status = 0"
data = self.query_db(query)
query = (
"SELECT name FROM semesterapparat WHERE appnr = ? and deletion_status = 0"
)
data = self.query_db(query, (appnr,))
if data:
return data[0][0]
else:
@@ -1856,8 +1872,8 @@ class Database:
return result
def getBookIdByPPN(self, ppn: str) -> int:
query = f"SELECT id FROM media WHERE bookdata LIKE '%{ppn}%'"
data = self.query_db(query)
query = "SELECT id FROM media WHERE bookdata LIKE ?"
data = self.query_db(query, (f"%{ppn}%",))
if data:
return data[0][0]
else:
@@ -1876,9 +1892,7 @@ class Database:
results = self.query_db(query, (apparat_id,))
res = []
for result in results:
old_edition_edition = self.query_db(
"SELECT bookdata FROM media WHERE id=?", (result[2],), one=True
)
# keep only new edition payload; old edition can be reconstructed if needed
res.append(BookData().from_string(result[1]))
return res

View File

@@ -1,20 +1,10 @@
import sys
import loguru
from PySide6.QtCore import QThread, Signal
from src import LOG_DIR
from src.backend import Database
from src.logic.webrequest import BibTextTransformer, WebRequest
from src.shared.logging import log
log = loguru.logger
log.remove()
log.add(sys.stdout, level="INFO")
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
log.add(sys.stdout, level="INFO")
# Logger configured centrally in main; this module just uses `log`
class BookGrabber(QThread):
@@ -37,7 +27,6 @@ class BookGrabber(QThread):
self.request = WebRequest()
self.db = Database()
def add_values(
self, app_id: int, prof_id: int, mode: str, data, any_book=False, exact=False
):
@@ -50,7 +39,9 @@ class BookGrabber(QThread):
log.info(f"Working on {len(self.data)} entries")
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
log.debug("State: " + str(self.tstate))
app_nr = self.db.query_db("SELECT appnr FROM semesterapparat WHERE id = ?", (self.app_id,))[0][0]
app_nr = self.db.query_db(
"SELECT appnr FROM semesterapparat WHERE id = ?", (self.app_id,)
)[0][0]
self.request.set_apparat(app_nr)
# log.debug(self.tstate)

View File

@@ -1,37 +1,24 @@
import os
import re
import sys
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from math import ceil
from queue import Empty, Queue
from time import monotonic # <-- NEW
from typing import List, Optional
import loguru
from PySide6.QtCore import QThread, Signal
from src import LOG_DIR
# from src.logic.webrequest import BibTextTransformer, WebRequest
from src.backend.catalogue import Catalogue
from src.logic import BookData
from src.logic.SRU import SWB
from src.shared.logging import log
# use all available cores - 2, but at least 1
THREAD_COUNT = max(os.cpu_count() - 2, 1)
THREAD_MIN_ITEMS = 5
log = loguru.logger
log.remove()
log.add(sys.stdout, level="INFO")
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
log.add(
f"{LOG_DIR}/{datetime.now().strftime('%Y-%m-%d')}.log",
rotation="1 day",
retention="7 days",
)
# Logger configured centrally in main; use shared `log`
swb = SWB()
dnb = SWB()
@@ -146,7 +133,7 @@ def find_newer_edition(
if not deduped:
return None
# 3) Final pick (single best)
# 3) Preserve all qualifying newer editions, but order by preference
def sort_key(b: BookData):
year = b.year if b.year is not None else -1
ed = b.edition_number if b.edition_number is not None else -1
@@ -158,8 +145,8 @@ def find_newer_edition(
ed,
)
best = max(deduped, key=sort_key)
return [best] if best else None
deduped.sort(key=sort_key, reverse=True)
return deduped
class NewEditionCheckerThread(QThread):

View File

@@ -1,20 +1,10 @@
import sys
# from src.transformers import RDS_AVAIL_DATA
import loguru
# from icecream import ic
from PySide6.QtCore import QThread
from PySide6.QtCore import Signal as Signal
from src import LOG_DIR
from src.backend.database import Database
from src.logic.webrequest import BibTextTransformer, WebRequest
log = loguru.logger
log.remove()
log.add(sys.stdout, level="INFO")
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
from src.shared.logging import log
class AvailChecker(QThread):