rebase codebase, delete trunk, move threads to backend
This commit is contained in:
@@ -1,12 +1,6 @@
|
||||
|
||||
from .dataclass import ApparatData, BookData, Prof, Apparat, ELSA
|
||||
from .thread_bookgrabber import BookGrabber
|
||||
from .threads_autoadder import AutoAdder
|
||||
from .threads_availchecker import AvailChecker
|
||||
from .c_sort import custom_sort, sort_semesters_list
|
||||
from .constants import APP_NRS, PROF_TITLES, SEMAP_MEDIA_ACCOUNTS
|
||||
from .csvparser import csv_to_list
|
||||
from .wordparser import elsa_word_to_csv, word_docx_to_csv
|
||||
from .zotero import ZoteroController
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
|
||||
|
||||
APP_NRS = [i for i in range(1, 181)]
|
||||
|
||||
PROF_TITLES = [
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
|
||||
import csv
|
||||
|
||||
import chardet
|
||||
|
||||
|
||||
def csv_to_list(path: str) -> list[str]:
|
||||
"""
|
||||
Extracts the data from a csv file and returns it as a pandas dataframe
|
||||
|
||||
@@ -144,6 +144,7 @@ class Subjects(Enum):
|
||||
if i.name == name:
|
||||
return i.id - 1
|
||||
|
||||
|
||||
@dataclass
|
||||
class Apparat:
|
||||
id: int | None = None
|
||||
@@ -162,20 +163,20 @@ class Apparat:
|
||||
konto: int | None = None
|
||||
|
||||
def from_tuple(self, data: tuple):
|
||||
setattr(self, "id", data[0])
|
||||
setattr(self, "name", data[1])
|
||||
setattr(self, "prof_id", data[2])
|
||||
setattr(self, "subject", data[3])
|
||||
setattr(self, "appnr", data[4])
|
||||
setattr(self, "created_semester", data[5])
|
||||
setattr(self, "extended_at", data[6])
|
||||
setattr(self, "eternal", data[7])
|
||||
setattr(self, "extend_until", data[8])
|
||||
setattr(self, "deleted", data[9])
|
||||
setattr(self, "deleted_date", data[10])
|
||||
setattr(self, "apparat_id_adis", data[11])
|
||||
setattr(self, "prof_id_adis", data[12])
|
||||
setattr(self, "konto", data[13])
|
||||
self.id = data[0]
|
||||
self.name = data[1]
|
||||
self.prof_id = data[2]
|
||||
self.subject = data[3]
|
||||
self.appnr = data[4]
|
||||
self.created_semester = data[5]
|
||||
self.extended_at = data[6]
|
||||
self.eternal = data[7]
|
||||
self.extend_until = data[8]
|
||||
self.deleted = data[9]
|
||||
self.deleted_date = data[10]
|
||||
self.apparat_id_adis = data[11]
|
||||
self.prof_id_adis = data[12]
|
||||
self.konto = data[13]
|
||||
return self
|
||||
|
||||
@property
|
||||
@@ -194,11 +195,13 @@ class ELSA:
|
||||
prof_id: int | None = None
|
||||
|
||||
def from_tuple(self, data):
|
||||
setattr(self, "id", data[0])
|
||||
setattr(self, "date", data[1])
|
||||
setattr(self, "semester", data[2])
|
||||
setattr(self, "prof_id", data[3])
|
||||
self.id = data[0]
|
||||
self.date = data[1]
|
||||
self.semester = data[2]
|
||||
self.prof_id = data[3]
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class ApparatData:
|
||||
prof: Prof = field(default_factory=Prof)
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
import csv
|
||||
|
||||
import pandas as pd
|
||||
|
||||
@@ -6,7 +6,7 @@ paragraphs = wordDoc.tables
|
||||
for table in paragraphs:
|
||||
for column in table.columns:
|
||||
cellcount = 0
|
||||
for cell in column.cells:
|
||||
for _cell in column.cells:
|
||||
if cellcount < 12:
|
||||
cellcount += 1
|
||||
# print(f"cell:{cell.text}")
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import yaml
|
||||
|
||||
@@ -1,189 +0,0 @@
|
||||
import sqlite3
|
||||
|
||||
from PyQt6.QtCore import QThread
|
||||
from PyQt6.QtCore import pyqtSignal as Signal
|
||||
from src.backend.database import Database
|
||||
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
|
||||
|
||||
|
||||
|
||||
class BookGrabber(QThread):
|
||||
updateSignal = Signal(int, int)
|
||||
done = Signal()
|
||||
|
||||
def __init__(self, appnr):
|
||||
super(BookGrabber, self).__init__(parent=None)
|
||||
self.is_Running = True
|
||||
logger.info("Starting worker thread")
|
||||
self.data = None
|
||||
self.app_id = None
|
||||
self.prof_id = None
|
||||
self.mode = None
|
||||
self.book_id = None
|
||||
self.use_any = False
|
||||
self.use_exact = False
|
||||
self.appnr = appnr
|
||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||
|
||||
def add_values(self, app_id, prof_id, mode, data, any_book=False, exact=False):
|
||||
self.app_id = app_id
|
||||
self.prof_id = prof_id
|
||||
self.mode = mode
|
||||
self.data = data
|
||||
self.use_any = any_book
|
||||
self.use_exact = exact
|
||||
logger.info(f"Working on {len(self.data)} entries")
|
||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||
logger.debug("State: " + str(self.tstate))
|
||||
# print(self.tstate)
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
item = 0
|
||||
iterdata = self.data
|
||||
# print(iterdata)
|
||||
if self.prof_id is None:
|
||||
self.prof_id = self.db.getProfNameByApparat(self.app_id)
|
||||
for entry in iterdata:
|
||||
# print(entry)
|
||||
signature = str(entry)
|
||||
logger.info("Processing entry: " + signature)
|
||||
|
||||
webdata = WebRequest().set_apparat(self.appnr).get_ppn(entry)
|
||||
if self.use_any:
|
||||
webdata = webdata.use_any_book
|
||||
webdata = webdata.get_data()
|
||||
|
||||
if webdata == "error":
|
||||
continue
|
||||
|
||||
bd = BibTextTransformer(self.mode)
|
||||
print(webdata)
|
||||
if self.mode == "ARRAY":
|
||||
if self.use_exact:
|
||||
bd = bd.use_signature(entry)
|
||||
bd = bd.get_data(webdata).return_data()
|
||||
print(bd)
|
||||
if bd is None:
|
||||
# bd = BookData
|
||||
continue
|
||||
bd.signature = entry
|
||||
transformer = (
|
||||
BibTextTransformer("RDS").get_data(webdata).return_data("rds_data")
|
||||
)
|
||||
|
||||
# confirm lock is acquired
|
||||
self.db.addBookToDatabase(bd, self.app_id, self.prof_id)
|
||||
# get latest book id
|
||||
self.book_id = self.db.getLastBookId()
|
||||
logger.info("Added book to database")
|
||||
state = 0
|
||||
for result in transformer.RDS_DATA:
|
||||
# print(result.RDS_LOCATION)
|
||||
if str(self.app_id) in result.RDS_LOCATION:
|
||||
state = 1
|
||||
break
|
||||
|
||||
logger.info(f"State of {signature}: {state}")
|
||||
# print("updating availability of " + str(self.book_id) + " to " + str(state))
|
||||
try:
|
||||
self.db.setAvailability(self.book_id, state)
|
||||
except sqlite3.OperationalError as e:
|
||||
logger.error(f"Failed to update availability: {e}")
|
||||
|
||||
# time.sleep(5)
|
||||
item += 1
|
||||
self.updateSignal.emit(item, len(self.data))
|
||||
logger.info("Worker thread finished")
|
||||
# self.done.emit()
|
||||
self.quit()
|
||||
|
||||
def stop(self):
|
||||
self.is_Running = False
|
||||
|
||||
# class BookGrabber(object):
|
||||
# updateSignal = Signal(int, int)
|
||||
# done = Signal()
|
||||
|
||||
# def __init__(self, app_id, prof_id, mode, data, parent=None):
|
||||
# super(BookGrabber, self).__init__(parent=None)
|
||||
# self.is_Running = True
|
||||
# logger = MyLogger("Worker")
|
||||
# logger.info("Starting worker thread")
|
||||
# self.data = data
|
||||
# logger.info(f"Working on {len(self.data)} entries")
|
||||
# self.app_id = app_id
|
||||
# self.prof_id = prof_id
|
||||
# self.mode = mode
|
||||
# self.book_id = None
|
||||
# self.state = (self.app_id, self.prof_id, self.mode, self.data)
|
||||
# # print(self.state)
|
||||
# logger.info("state: " + str(self.state))
|
||||
# # time.sleep(2)
|
||||
|
||||
# def resetValues(self):
|
||||
# self.app_id = None
|
||||
# self.prof_id = None
|
||||
# self.mode = None
|
||||
# self.data = None
|
||||
# self.book_id = None
|
||||
|
||||
# def run(self):
|
||||
# while self.is_Running:
|
||||
# self.db = Database()
|
||||
# item = 0
|
||||
# iterdata = self.data
|
||||
# # print(iterdata)
|
||||
# for entry in iterdata:
|
||||
# # print(entry)
|
||||
# signature = str(entry)
|
||||
# logger.info("Processing entry: " + signature)
|
||||
|
||||
# webdata = WebRequest().get_ppn(entry).get_data()
|
||||
# if webdata == "error":
|
||||
# continue
|
||||
# bd = BibTextTransformer(self.mode).get_data(webdata).return_data()
|
||||
# transformer = BibTextTransformer("RDS")
|
||||
# rds = transformer.get_data(webdata).return_data("rds_availability")
|
||||
# bd.signature = entry
|
||||
# # confirm lock is acquired
|
||||
# self.db.addBookToDatabase(bd, self.app_id, self.prof_id)
|
||||
# # get latest book id
|
||||
# self.book_id = self.db.getLastBookId()
|
||||
# logger.info("Added book to database")
|
||||
# state = 0
|
||||
# # print(len(rds.items))
|
||||
# for rds_item in rds.items:
|
||||
# sign = rds_item.superlocation
|
||||
# loc = rds_item.location
|
||||
# # logger.debug(sign, loc)
|
||||
# # logger.debug(rds_item)
|
||||
# if self.app_id in sign or self.app_id in loc:
|
||||
# state = 1
|
||||
# break
|
||||
|
||||
# logger.info(f"State of {signature}: {state}")
|
||||
# # print(
|
||||
# "updating availability of "
|
||||
# + str(self.book_id)
|
||||
# + " to "
|
||||
# + str(state)
|
||||
# )
|
||||
# try:
|
||||
# self.db.setAvailability(self.book_id, state)
|
||||
# except sqlite3.OperationalError as e:
|
||||
# logger.error(f"Failed to update availability: {e}")
|
||||
|
||||
# # time.sleep(5)
|
||||
# item += 1
|
||||
# self.updateSignal.emit(item, len(self.data))
|
||||
# logger.info("Worker thread finished")
|
||||
# # self.done.emit()
|
||||
# self.stop()
|
||||
# if not self.is_Running:
|
||||
# break
|
||||
|
||||
# def stop(self):
|
||||
# self.is_Running = False
|
||||
@@ -1,51 +0,0 @@
|
||||
import time
|
||||
|
||||
# from icecream import ic
|
||||
from PyQt6.QtCore import QThread
|
||||
from PyQt6.QtCore import pyqtSignal as Signal
|
||||
|
||||
from src.backend.database import Database
|
||||
|
||||
|
||||
# from src.transformers import RDS_AVAIL_DATA
|
||||
|
||||
|
||||
class AutoAdder(QThread):
|
||||
updateSignal = Signal(int)
|
||||
|
||||
setTextSignal = Signal(int)
|
||||
progress = Signal(int)
|
||||
|
||||
def __init__(self, data=None, app_id=None, prof_id=None, parent=None):
|
||||
super().__init__(parent)
|
||||
self.data = data
|
||||
self.app_id = app_id
|
||||
self.prof_id = prof_id
|
||||
|
||||
# print("Launched AutoAdder")
|
||||
# print(self.data, self.app_id, self.prof_id)
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
# show the dialog, start the thread to gather data and dynamically update progressbar and listwidget
|
||||
logger.info("Starting worker thread")
|
||||
item = 0
|
||||
for entry in self.data:
|
||||
try:
|
||||
|
||||
self.updateSignal.emit(item)
|
||||
self.setTextSignal.emit(entry)
|
||||
item += 1
|
||||
self.progress.emit(item)
|
||||
time.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
# print(e)
|
||||
logger.exception(
|
||||
f"The query failed with message {e} for signature {entry}"
|
||||
)
|
||||
continue
|
||||
if item == len(self.data):
|
||||
logger.info("Worker thread finished")
|
||||
# teminate thread
|
||||
self.finished.emit()
|
||||
@@ -1,72 +0,0 @@
|
||||
import time
|
||||
|
||||
# from icecream import ic
|
||||
from PyQt6.QtCore import QThread
|
||||
from PyQt6.QtCore import pyqtSignal as Signal
|
||||
|
||||
from src.backend.database import Database
|
||||
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
|
||||
# from src.transformers import RDS_AVAIL_DATA
|
||||
|
||||
|
||||
class AvailChecker(QThread):
|
||||
updateSignal = Signal(str, int)
|
||||
updateProgress = Signal(int, int)
|
||||
|
||||
def __init__(
|
||||
self, links: list = None, appnumber: int = None, parent=None, books=list[dict]
|
||||
):
|
||||
if links is None:
|
||||
links = []
|
||||
super().__init__(parent)
|
||||
logger.info("Starting worker thread")
|
||||
logger.info(
|
||||
"Checking availability for "
|
||||
+ str(links)
|
||||
+ " with appnumber "
|
||||
+ str(appnumber)
|
||||
+ "..."
|
||||
)
|
||||
self.links = links
|
||||
self.appnumber = appnumber
|
||||
self.books = books
|
||||
logger.info(
|
||||
f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books..."
|
||||
)
|
||||
time.sleep(2)
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
state = 0
|
||||
count = 0
|
||||
for link in self.links:
|
||||
logger.info("Processing entry: " + str(link))
|
||||
data = WebRequest().set_apparat(self.appnumber).get_ppn(link).get_data()
|
||||
transformer = BibTextTransformer("RDS")
|
||||
rds = transformer.get_data(data).return_data("rds_availability")
|
||||
|
||||
book_id = None
|
||||
for item in rds.items:
|
||||
sign = item.superlocation
|
||||
loc = item.location
|
||||
# # print(item.location)
|
||||
if self.appnumber in sign or self.appnumber in loc:
|
||||
state = 1
|
||||
break
|
||||
for book in self.books:
|
||||
if book["bookdata"].signature == link:
|
||||
book_id = book["id"]
|
||||
break
|
||||
logger.info(f"State of {link}: " + str(state))
|
||||
# print("Updating availability of " + str(book_id) + " to " + str(state))
|
||||
self.db.setAvailability(book_id, state)
|
||||
count += 1
|
||||
self.updateProgress.emit(count, len(self.links))
|
||||
self.updateSignal.emit(item.callnumber, state)
|
||||
|
||||
logger.info("Worker thread finished")
|
||||
# teminate thread
|
||||
|
||||
self.quit()
|
||||
@@ -1,6 +1,7 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from src import logger
|
||||
|
||||
# import sleep_and_retry decorator to retry requests
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
|
||||
@@ -27,11 +28,12 @@ HEADERS = {
|
||||
RATE_LIMIT = 20
|
||||
RATE_PERIOD = 30
|
||||
|
||||
|
||||
class WebRequest:
|
||||
def __init__(self) -> None:
|
||||
"""Request data from the web, and format it depending on the mode."""
|
||||
self.apparat = None
|
||||
self.use_any = False # use any book that matches the search term
|
||||
self.use_any = False # use any book that matches the search term
|
||||
self.signature = None
|
||||
self.ppn = None
|
||||
self.data = None
|
||||
@@ -44,6 +46,7 @@ class WebRequest:
|
||||
self.use_any = True
|
||||
logger.info("Using any book")
|
||||
return self
|
||||
|
||||
def set_apparat(self, apparat):
|
||||
self.apparat = apparat
|
||||
if int(self.apparat) < 10:
|
||||
@@ -59,6 +62,7 @@ class WebRequest:
|
||||
signature = signature.split("/")[-1]
|
||||
self.ppn = signature
|
||||
return self
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search_book(self, searchterm: str):
|
||||
@@ -73,6 +77,7 @@ class WebRequest:
|
||||
for link in links:
|
||||
res.append(BASE + link["href"])
|
||||
return res
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search(self, link: str):
|
||||
@@ -82,6 +87,7 @@ class WebRequest:
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Request failed: {e}")
|
||||
return None
|
||||
|
||||
def get_data(self):
|
||||
links = self.get_book_links(self.ppn)
|
||||
for link in links:
|
||||
@@ -170,11 +176,11 @@ class BibTextTransformer:
|
||||
COinS_IDENT = "ctx_ver"
|
||||
BIBTEX_IDENT = "@book"
|
||||
RDS_IDENT = "RDS ---------------------------------- "
|
||||
|
||||
|
||||
if data is None:
|
||||
self.data = None
|
||||
return self
|
||||
|
||||
|
||||
if self.mode == "RIS":
|
||||
for line in data:
|
||||
if RIS_IDENT in line:
|
||||
@@ -207,7 +213,7 @@ class BibTextTransformer:
|
||||
BookData: a dataclass containing data about the book
|
||||
"""
|
||||
if self.data is None:
|
||||
return None
|
||||
return None
|
||||
match self.mode:
|
||||
case "ARRAY":
|
||||
return ARRAYData(self.signature).transform(self.data)
|
||||
@@ -218,11 +224,10 @@ class BibTextTransformer:
|
||||
case "RIS":
|
||||
return RISData().transform(self.data)
|
||||
case "RDS":
|
||||
return RDSData().transform(self.data).return_data(option)
|
||||
return RDSData().transform(self.data).return_data(option)
|
||||
case None:
|
||||
return None
|
||||
|
||||
|
||||
|
||||
# if self.mode == "ARRAY":
|
||||
# return ARRAYData().transform(self.data)
|
||||
# elif self.mode == "COinS":
|
||||
@@ -252,4 +257,3 @@ if __name__ == "__main__":
|
||||
data = WebRequest(71).get_ppn(link).get_data()
|
||||
bib = BibTextTransformer("ARRAY").get_data().return_data()
|
||||
print(bib)
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
|
||||
import pandas as pd
|
||||
from docx import Document
|
||||
|
||||
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
|
||||
def word_docx_to_csv(path) -> pd.DataFrame:
|
||||
doc = Document(path)
|
||||
tables = doc.tables
|
||||
@@ -25,6 +26,8 @@ def word_docx_to_csv(path) -> pd.DataFrame:
|
||||
|
||||
df = m_data[2]
|
||||
return df
|
||||
|
||||
|
||||
def makeDict():
|
||||
return {
|
||||
"work_author": None,
|
||||
@@ -42,6 +45,7 @@ def makeDict():
|
||||
"type": None,
|
||||
}
|
||||
|
||||
|
||||
def tuple_to_dict(tlist: tuple, type: str) -> dict:
|
||||
ret = []
|
||||
for line in tlist:
|
||||
@@ -82,6 +86,7 @@ def tuple_to_dict(tlist: tuple, type: str) -> dict:
|
||||
ret.append(data)
|
||||
return ret
|
||||
|
||||
|
||||
def elsa_word_to_csv(path):
|
||||
doc = Document(path)
|
||||
# # print all lines in doc
|
||||
@@ -119,4 +124,4 @@ def elsa_word_to_csv(path):
|
||||
|
||||
if __name__ == "__main__":
|
||||
else_df = elsa_word_to_csv("C:/Users/aky547/Desktop/Antrag ELSA Schweitzer.docx")
|
||||
# print(else_df)
|
||||
# print(else_df)
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
|
||||
from pyzotero import zotero
|
||||
from dataclasses import dataclass
|
||||
from src.logic.webrequest import WebRequest, BibTextTransformer
|
||||
from src import settings
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class Creator:
|
||||
firstName: str = None
|
||||
@@ -160,6 +158,7 @@ class JournalArticle:
|
||||
|
||||
class ZoteroController:
|
||||
zoterocfg = settings.zotero
|
||||
|
||||
def __init__(self):
|
||||
self.zot = zotero.Zotero(
|
||||
self.zoterocfg.library_id,
|
||||
|
||||
Reference in New Issue
Block a user