rebase codebase, delete trunk, move threads to backend

This commit is contained in:
2025-01-14 16:20:08 +01:00
parent fba652006f
commit 08cd18f3f1
106 changed files with 1604 additions and 1057 deletions

View File

@@ -1,12 +1,6 @@
from .dataclass import ApparatData, BookData, Prof, Apparat, ELSA
from .thread_bookgrabber import BookGrabber
from .threads_autoadder import AutoAdder
from .threads_availchecker import AvailChecker
from .c_sort import custom_sort, sort_semesters_list
from .constants import APP_NRS, PROF_TITLES, SEMAP_MEDIA_ACCOUNTS
from .csvparser import csv_to_list
from .wordparser import elsa_word_to_csv, word_docx_to_csv
from .zotero import ZoteroController

View File

@@ -1,5 +1,3 @@
APP_NRS = [i for i in range(1, 181)]
PROF_TITLES = [

View File

@@ -1,8 +1,8 @@
import csv
import chardet
def csv_to_list(path: str) -> list[str]:
"""
Extracts the data from a csv file and returns it as a pandas dataframe

View File

@@ -144,6 +144,7 @@ class Subjects(Enum):
if i.name == name:
return i.id - 1
@dataclass
class Apparat:
id: int | None = None
@@ -162,20 +163,20 @@ class Apparat:
konto: int | None = None
def from_tuple(self, data: tuple):
setattr(self, "id", data[0])
setattr(self, "name", data[1])
setattr(self, "prof_id", data[2])
setattr(self, "subject", data[3])
setattr(self, "appnr", data[4])
setattr(self, "created_semester", data[5])
setattr(self, "extended_at", data[6])
setattr(self, "eternal", data[7])
setattr(self, "extend_until", data[8])
setattr(self, "deleted", data[9])
setattr(self, "deleted_date", data[10])
setattr(self, "apparat_id_adis", data[11])
setattr(self, "prof_id_adis", data[12])
setattr(self, "konto", data[13])
self.id = data[0]
self.name = data[1]
self.prof_id = data[2]
self.subject = data[3]
self.appnr = data[4]
self.created_semester = data[5]
self.extended_at = data[6]
self.eternal = data[7]
self.extend_until = data[8]
self.deleted = data[9]
self.deleted_date = data[10]
self.apparat_id_adis = data[11]
self.prof_id_adis = data[12]
self.konto = data[13]
return self
@property
@@ -194,11 +195,13 @@ class ELSA:
prof_id: int | None = None
def from_tuple(self, data):
setattr(self, "id", data[0])
setattr(self, "date", data[1])
setattr(self, "semester", data[2])
setattr(self, "prof_id", data[3])
self.id = data[0]
self.date = data[1]
self.semester = data[2]
self.prof_id = data[3]
return self
@dataclass
class ApparatData:
prof: Prof = field(default_factory=Prof)

View File

@@ -1,4 +1,3 @@
import csv
import pandas as pd

View File

@@ -6,7 +6,7 @@ paragraphs = wordDoc.tables
for table in paragraphs:
for column in table.columns:
cellcount = 0
for cell in column.cells:
for _cell in column.cells:
if cellcount < 12:
cellcount += 1
# print(f"cell:{cell.text}")

View File

@@ -1,4 +1,3 @@
from dataclasses import dataclass, field
import yaml

View File

@@ -1,189 +0,0 @@
import sqlite3
from PyQt6.QtCore import QThread
from PyQt6.QtCore import pyqtSignal as Signal
from src.backend.database import Database
from src.logic.webrequest import BibTextTransformer, WebRequest
class BookGrabber(QThread):
updateSignal = Signal(int, int)
done = Signal()
def __init__(self, appnr):
super(BookGrabber, self).__init__(parent=None)
self.is_Running = True
logger.info("Starting worker thread")
self.data = None
self.app_id = None
self.prof_id = None
self.mode = None
self.book_id = None
self.use_any = False
self.use_exact = False
self.appnr = appnr
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
def add_values(self, app_id, prof_id, mode, data, any_book=False, exact=False):
self.app_id = app_id
self.prof_id = prof_id
self.mode = mode
self.data = data
self.use_any = any_book
self.use_exact = exact
logger.info(f"Working on {len(self.data)} entries")
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
logger.debug("State: " + str(self.tstate))
# print(self.tstate)
def run(self):
self.db = Database()
item = 0
iterdata = self.data
# print(iterdata)
if self.prof_id is None:
self.prof_id = self.db.getProfNameByApparat(self.app_id)
for entry in iterdata:
# print(entry)
signature = str(entry)
logger.info("Processing entry: " + signature)
webdata = WebRequest().set_apparat(self.appnr).get_ppn(entry)
if self.use_any:
webdata = webdata.use_any_book
webdata = webdata.get_data()
if webdata == "error":
continue
bd = BibTextTransformer(self.mode)
print(webdata)
if self.mode == "ARRAY":
if self.use_exact:
bd = bd.use_signature(entry)
bd = bd.get_data(webdata).return_data()
print(bd)
if bd is None:
# bd = BookData
continue
bd.signature = entry
transformer = (
BibTextTransformer("RDS").get_data(webdata).return_data("rds_data")
)
# confirm lock is acquired
self.db.addBookToDatabase(bd, self.app_id, self.prof_id)
# get latest book id
self.book_id = self.db.getLastBookId()
logger.info("Added book to database")
state = 0
for result in transformer.RDS_DATA:
# print(result.RDS_LOCATION)
if str(self.app_id) in result.RDS_LOCATION:
state = 1
break
logger.info(f"State of {signature}: {state}")
# print("updating availability of " + str(self.book_id) + " to " + str(state))
try:
self.db.setAvailability(self.book_id, state)
except sqlite3.OperationalError as e:
logger.error(f"Failed to update availability: {e}")
# time.sleep(5)
item += 1
self.updateSignal.emit(item, len(self.data))
logger.info("Worker thread finished")
# self.done.emit()
self.quit()
def stop(self):
self.is_Running = False
# class BookGrabber(object):
# updateSignal = Signal(int, int)
# done = Signal()
# def __init__(self, app_id, prof_id, mode, data, parent=None):
# super(BookGrabber, self).__init__(parent=None)
# self.is_Running = True
# logger = MyLogger("Worker")
# logger.info("Starting worker thread")
# self.data = data
# logger.info(f"Working on {len(self.data)} entries")
# self.app_id = app_id
# self.prof_id = prof_id
# self.mode = mode
# self.book_id = None
# self.state = (self.app_id, self.prof_id, self.mode, self.data)
# # print(self.state)
# logger.info("state: " + str(self.state))
# # time.sleep(2)
# def resetValues(self):
# self.app_id = None
# self.prof_id = None
# self.mode = None
# self.data = None
# self.book_id = None
# def run(self):
# while self.is_Running:
# self.db = Database()
# item = 0
# iterdata = self.data
# # print(iterdata)
# for entry in iterdata:
# # print(entry)
# signature = str(entry)
# logger.info("Processing entry: " + signature)
# webdata = WebRequest().get_ppn(entry).get_data()
# if webdata == "error":
# continue
# bd = BibTextTransformer(self.mode).get_data(webdata).return_data()
# transformer = BibTextTransformer("RDS")
# rds = transformer.get_data(webdata).return_data("rds_availability")
# bd.signature = entry
# # confirm lock is acquired
# self.db.addBookToDatabase(bd, self.app_id, self.prof_id)
# # get latest book id
# self.book_id = self.db.getLastBookId()
# logger.info("Added book to database")
# state = 0
# # print(len(rds.items))
# for rds_item in rds.items:
# sign = rds_item.superlocation
# loc = rds_item.location
# # logger.debug(sign, loc)
# # logger.debug(rds_item)
# if self.app_id in sign or self.app_id in loc:
# state = 1
# break
# logger.info(f"State of {signature}: {state}")
# # print(
# "updating availability of "
# + str(self.book_id)
# + " to "
# + str(state)
# )
# try:
# self.db.setAvailability(self.book_id, state)
# except sqlite3.OperationalError as e:
# logger.error(f"Failed to update availability: {e}")
# # time.sleep(5)
# item += 1
# self.updateSignal.emit(item, len(self.data))
# logger.info("Worker thread finished")
# # self.done.emit()
# self.stop()
# if not self.is_Running:
# break
# def stop(self):
# self.is_Running = False

View File

@@ -1,51 +0,0 @@
import time
# from icecream import ic
from PyQt6.QtCore import QThread
from PyQt6.QtCore import pyqtSignal as Signal
from src.backend.database import Database
# from src.transformers import RDS_AVAIL_DATA
class AutoAdder(QThread):
updateSignal = Signal(int)
setTextSignal = Signal(int)
progress = Signal(int)
def __init__(self, data=None, app_id=None, prof_id=None, parent=None):
super().__init__(parent)
self.data = data
self.app_id = app_id
self.prof_id = prof_id
# print("Launched AutoAdder")
# print(self.data, self.app_id, self.prof_id)
def run(self):
self.db = Database()
# show the dialog, start the thread to gather data and dynamically update progressbar and listwidget
logger.info("Starting worker thread")
item = 0
for entry in self.data:
try:
self.updateSignal.emit(item)
self.setTextSignal.emit(entry)
item += 1
self.progress.emit(item)
time.sleep(1)
except Exception as e:
# print(e)
logger.exception(
f"The query failed with message {e} for signature {entry}"
)
continue
if item == len(self.data):
logger.info("Worker thread finished")
# teminate thread
self.finished.emit()

View File

@@ -1,72 +0,0 @@
import time
# from icecream import ic
from PyQt6.QtCore import QThread
from PyQt6.QtCore import pyqtSignal as Signal
from src.backend.database import Database
from src.logic.webrequest import BibTextTransformer, WebRequest
# from src.transformers import RDS_AVAIL_DATA
class AvailChecker(QThread):
updateSignal = Signal(str, int)
updateProgress = Signal(int, int)
def __init__(
self, links: list = None, appnumber: int = None, parent=None, books=list[dict]
):
if links is None:
links = []
super().__init__(parent)
logger.info("Starting worker thread")
logger.info(
"Checking availability for "
+ str(links)
+ " with appnumber "
+ str(appnumber)
+ "..."
)
self.links = links
self.appnumber = appnumber
self.books = books
logger.info(
f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books..."
)
time.sleep(2)
def run(self):
self.db = Database()
state = 0
count = 0
for link in self.links:
logger.info("Processing entry: " + str(link))
data = WebRequest().set_apparat(self.appnumber).get_ppn(link).get_data()
transformer = BibTextTransformer("RDS")
rds = transformer.get_data(data).return_data("rds_availability")
book_id = None
for item in rds.items:
sign = item.superlocation
loc = item.location
# # print(item.location)
if self.appnumber in sign or self.appnumber in loc:
state = 1
break
for book in self.books:
if book["bookdata"].signature == link:
book_id = book["id"]
break
logger.info(f"State of {link}: " + str(state))
# print("Updating availability of " + str(book_id) + " to " + str(state))
self.db.setAvailability(book_id, state)
count += 1
self.updateProgress.emit(count, len(self.links))
self.updateSignal.emit(item.callnumber, state)
logger.info("Worker thread finished")
# teminate thread
self.quit()

View File

@@ -1,6 +1,7 @@
import requests
from bs4 import BeautifulSoup
from src import logger
# import sleep_and_retry decorator to retry requests
from ratelimit import limits, sleep_and_retry
@@ -27,11 +28,12 @@ HEADERS = {
RATE_LIMIT = 20
RATE_PERIOD = 30
class WebRequest:
def __init__(self) -> None:
"""Request data from the web, and format it depending on the mode."""
self.apparat = None
self.use_any = False # use any book that matches the search term
self.use_any = False # use any book that matches the search term
self.signature = None
self.ppn = None
self.data = None
@@ -44,6 +46,7 @@ class WebRequest:
self.use_any = True
logger.info("Using any book")
return self
def set_apparat(self, apparat):
self.apparat = apparat
if int(self.apparat) < 10:
@@ -59,6 +62,7 @@ class WebRequest:
signature = signature.split("/")[-1]
self.ppn = signature
return self
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
def search_book(self, searchterm: str):
@@ -73,6 +77,7 @@ class WebRequest:
for link in links:
res.append(BASE + link["href"])
return res
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
def search(self, link: str):
@@ -82,6 +87,7 @@ class WebRequest:
except requests.exceptions.RequestException as e:
logger.error(f"Request failed: {e}")
return None
def get_data(self):
links = self.get_book_links(self.ppn)
for link in links:
@@ -170,11 +176,11 @@ class BibTextTransformer:
COinS_IDENT = "ctx_ver"
BIBTEX_IDENT = "@book"
RDS_IDENT = "RDS ---------------------------------- "
if data is None:
self.data = None
return self
if self.mode == "RIS":
for line in data:
if RIS_IDENT in line:
@@ -207,7 +213,7 @@ class BibTextTransformer:
BookData: a dataclass containing data about the book
"""
if self.data is None:
return None
return None
match self.mode:
case "ARRAY":
return ARRAYData(self.signature).transform(self.data)
@@ -218,11 +224,10 @@ class BibTextTransformer:
case "RIS":
return RISData().transform(self.data)
case "RDS":
return RDSData().transform(self.data).return_data(option)
return RDSData().transform(self.data).return_data(option)
case None:
return None
# if self.mode == "ARRAY":
# return ARRAYData().transform(self.data)
# elif self.mode == "COinS":
@@ -252,4 +257,3 @@ if __name__ == "__main__":
data = WebRequest(71).get_ppn(link).get_data()
bib = BibTextTransformer("ARRAY").get_data().return_data()
print(bib)

View File

@@ -1,8 +1,9 @@
import pandas as pd
from docx import Document
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
def word_docx_to_csv(path) -> pd.DataFrame:
doc = Document(path)
tables = doc.tables
@@ -25,6 +26,8 @@ def word_docx_to_csv(path) -> pd.DataFrame:
df = m_data[2]
return df
def makeDict():
return {
"work_author": None,
@@ -42,6 +45,7 @@ def makeDict():
"type": None,
}
def tuple_to_dict(tlist: tuple, type: str) -> dict:
ret = []
for line in tlist:
@@ -82,6 +86,7 @@ def tuple_to_dict(tlist: tuple, type: str) -> dict:
ret.append(data)
return ret
def elsa_word_to_csv(path):
doc = Document(path)
# # print all lines in doc
@@ -119,4 +124,4 @@ def elsa_word_to_csv(path):
if __name__ == "__main__":
else_df = elsa_word_to_csv("C:/Users/aky547/Desktop/Antrag ELSA Schweitzer.docx")
# print(else_df)
# print(else_df)

View File

@@ -1,11 +1,9 @@
from pyzotero import zotero
from dataclasses import dataclass
from src.logic.webrequest import WebRequest, BibTextTransformer
from src import settings
@dataclass
class Creator:
firstName: str = None
@@ -160,6 +158,7 @@ class JournalArticle:
class ZoteroController:
zoterocfg = settings.zotero
def __init__(self):
self.zot = zotero.Zotero(
self.zoterocfg.library_id,