This commit is contained in:
@@ -1,5 +1,3 @@
|
|||||||
import sqlite3
|
|
||||||
|
|
||||||
from PyQt6.QtCore import QThread
|
from PyQt6.QtCore import QThread
|
||||||
from PyQt6.QtCore import pyqtSignal as Signal
|
from PyQt6.QtCore import pyqtSignal as Signal
|
||||||
from src.backend import Database
|
from src.backend import Database
|
||||||
@@ -25,19 +23,20 @@ class BookGrabber(QThread):
|
|||||||
updateSignal = Signal(int, int)
|
updateSignal = Signal(int, int)
|
||||||
done = Signal()
|
done = Signal()
|
||||||
|
|
||||||
def __init__(self, appnr: int):
|
def __init__(self):
|
||||||
super(BookGrabber, self).__init__(parent=None)
|
super(BookGrabber, self).__init__(parent=None)
|
||||||
self.is_Running = True
|
self.is_Running = True
|
||||||
logger.info("Starting worker thread")
|
logger.info("Starting worker thread")
|
||||||
self.data = None
|
self.data = []
|
||||||
self.app_id = None
|
self.app_id = None
|
||||||
self.prof_id = None
|
self.prof_id = None
|
||||||
self.mode = None
|
self.mode = None
|
||||||
self.book_id = None
|
self.book_id = None
|
||||||
self.use_any = False
|
self.use_any = False
|
||||||
self.use_exact = False
|
self.use_exact = False
|
||||||
self.appnr = appnr
|
self.app_id = None
|
||||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||||
|
self.request = WebRequest()
|
||||||
|
|
||||||
def add_values(
|
def add_values(
|
||||||
self, app_id: int, prof_id: int, mode: str, data, any_book=False, exact=False
|
self, app_id: int, prof_id: int, mode: str, data, any_book=False, exact=False
|
||||||
@@ -45,27 +44,26 @@ class BookGrabber(QThread):
|
|||||||
self.app_id = app_id
|
self.app_id = app_id
|
||||||
self.prof_id = prof_id
|
self.prof_id = prof_id
|
||||||
self.mode = mode
|
self.mode = mode
|
||||||
self.data = data
|
self.data: list[str] = data
|
||||||
self.use_any = any_book
|
self.use_any = any_book
|
||||||
self.use_exact = exact
|
self.use_exact = exact
|
||||||
logger.info(f"Working on {len(self.data)} entries")
|
logger.info(f"Working on {len(self.data)} entries")
|
||||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||||
logger.debug("State: " + str(self.tstate))
|
logger.debug("State: " + str(self.tstate))
|
||||||
# print(self.tstate)
|
self.request.set_apparat(self.app_id)
|
||||||
|
# logger.debug(self.tstate)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.db = Database()
|
self.db = Database()
|
||||||
item = 0
|
item = 0
|
||||||
iterdata = self.data
|
iterdata = self.data
|
||||||
# print(iterdata)
|
# logger.debug(iterdata)
|
||||||
if self.prof_id is None:
|
|
||||||
self.prof_id = self.db.getProfNameByApparat(self.app_id)
|
|
||||||
for entry in iterdata:
|
|
||||||
# print(entry)
|
|
||||||
signature = str(entry)
|
|
||||||
logger.info("Processing entry: " + signature)
|
|
||||||
|
|
||||||
webdata = WebRequest().set_apparat(self.appnr).get_ppn(entry)
|
for entry in iterdata:
|
||||||
|
# logger.debug(entry)
|
||||||
|
logger.info("Processing entry: {}", entry)
|
||||||
|
|
||||||
|
webdata = self.request.get_ppn(entry)
|
||||||
if self.use_any:
|
if self.use_any:
|
||||||
webdata = webdata.use_any_book
|
webdata = webdata.use_any_book
|
||||||
webdata = webdata.get_data()
|
webdata = webdata.get_data()
|
||||||
@@ -74,12 +72,12 @@ class BookGrabber(QThread):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
bd = BibTextTransformer(self.mode)
|
bd = BibTextTransformer(self.mode)
|
||||||
print(webdata)
|
logger.debug(webdata)
|
||||||
if self.mode == "ARRAY":
|
if self.mode == "ARRAY":
|
||||||
if self.use_exact:
|
if self.use_exact:
|
||||||
bd = bd.use_signature(entry)
|
bd = bd.use_signature(entry)
|
||||||
bd = bd.get_data(webdata).return_data()
|
bd = bd.get_data(webdata).return_data()
|
||||||
print(bd)
|
logger.debug(bd)
|
||||||
if bd is None:
|
if bd is None:
|
||||||
# bd = BookData
|
# bd = BookData
|
||||||
continue
|
continue
|
||||||
@@ -95,19 +93,21 @@ class BookGrabber(QThread):
|
|||||||
logger.info("Added book to database")
|
logger.info("Added book to database")
|
||||||
state = 0
|
state = 0
|
||||||
for result in transformer.RDS_DATA:
|
for result in transformer.RDS_DATA:
|
||||||
# print(result.RDS_LOCATION)
|
# logger.debug(result.RDS_LOCATION)
|
||||||
if str(self.app_id) in result.RDS_LOCATION:
|
if str(self.app_id) in result.RDS_LOCATION:
|
||||||
state = 1
|
state = 1
|
||||||
break
|
break
|
||||||
|
|
||||||
logger.info(f"State of {signature}: {state}")
|
logger.info(f"State of {entry}: {state}")
|
||||||
print("updating availability of " + str(self.book_id) + " to " + str(state))
|
logger.debug(
|
||||||
|
"updating availability of " + str(self.book_id) + " to " + str(state)
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
self.db.setAvailability(self.book_id, state)
|
self.db.setAvailability(self.book_id, state)
|
||||||
print("Added book to database")
|
logger.debug("Added book to database")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to update availability: {e}")
|
logger.error(f"Failed to update availability: {e}")
|
||||||
print("Failed to update availability: " + str(e))
|
logger.debug("Failed to update availability: " + str(e))
|
||||||
|
|
||||||
# time.sleep(5)
|
# time.sleep(5)
|
||||||
item += 1
|
item += 1
|
||||||
@@ -135,7 +135,7 @@ class BookGrabberTest(QThread):
|
|||||||
self.book_id = None
|
self.book_id = None
|
||||||
self.use_any = False
|
self.use_any = False
|
||||||
self.use_exact = False
|
self.use_exact = False
|
||||||
self.appnr = appnr
|
self.app_id = appnr
|
||||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||||
self.results = []
|
self.results = []
|
||||||
|
|
||||||
@@ -151,18 +151,18 @@ class BookGrabberTest(QThread):
|
|||||||
logger.info(f"Working on {len(self.data)} entries")
|
logger.info(f"Working on {len(self.data)} entries")
|
||||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||||
logger.debug("State: " + str(self.tstate))
|
logger.debug("State: " + str(self.tstate))
|
||||||
# print(self.tstate)
|
# logger.debug(self.tstate)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
item = 0
|
item = 0
|
||||||
iterdata = self.data
|
iterdata = self.data
|
||||||
# print(iterdata)
|
# logger.debug(iterdata)
|
||||||
for entry in iterdata:
|
for entry in iterdata:
|
||||||
# print(entry)
|
# logger.debug(entry)
|
||||||
signature = str(entry)
|
signature = str(entry)
|
||||||
logger.info("Processing entry: " + signature)
|
logger.info("Processing entry: " + signature)
|
||||||
|
|
||||||
webdata = WebRequest().set_apparat(self.appnr).get_ppn(entry)
|
webdata = WebRequest().set_apparat(self.app_id).get_ppn(entry)
|
||||||
if self.use_any:
|
if self.use_any:
|
||||||
webdata = webdata.use_any_book
|
webdata = webdata.use_any_book
|
||||||
webdata = webdata.get_data()
|
webdata = webdata.get_data()
|
||||||
@@ -188,13 +188,13 @@ class BookGrabberTest(QThread):
|
|||||||
logger.info("Added book to database")
|
logger.info("Added book to database")
|
||||||
state = 0
|
state = 0
|
||||||
for result in transformer.RDS_DATA:
|
for result in transformer.RDS_DATA:
|
||||||
# print(result.RDS_LOCATION)
|
# logger.debug(result.RDS_LOCATION)
|
||||||
if str(self.app_id) in result.RDS_LOCATION:
|
if str(self.app_id) in result.RDS_LOCATION:
|
||||||
state = 1
|
state = 1
|
||||||
break
|
break
|
||||||
|
|
||||||
logger.info(f"State of {signature}: {state}")
|
logger.info(f"State of {signature}: {state}")
|
||||||
# print("updating availability of " + str(self.book_id) + " to " + str(state))
|
# logger.debug("updating availability of " + str(self.book_id) + " to " + str(state))
|
||||||
self.results.append(bd)
|
self.results.append(bd)
|
||||||
|
|
||||||
# time.sleep(5)
|
# time.sleep(5)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from bs4 import BeautifulSoup
|
|||||||
|
|
||||||
# import sleep_and_retry decorator to retry requests
|
# import sleep_and_retry decorator to retry requests
|
||||||
from ratelimit import limits, sleep_and_retry
|
from ratelimit import limits, sleep_and_retry
|
||||||
|
from typing import Union, Any
|
||||||
from src.logic.dataclass import BookData
|
from src.logic.dataclass import BookData
|
||||||
|
|
||||||
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
|
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
|
||||||
@@ -61,14 +61,14 @@ class WebRequest:
|
|||||||
logger.info("Using any book")
|
logger.info("Using any book")
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def set_apparat(self, apparat):
|
def set_apparat(self, apparat: int):
|
||||||
self.apparat = apparat
|
self.apparat = apparat
|
||||||
if int(self.apparat) < 10:
|
if int(self.apparat) < 10:
|
||||||
self.apparat = f"0{self.apparat}"
|
self.apparat = f"0{self.apparat}"
|
||||||
logger.info(f"Set apparat to {self.apparat}")
|
logger.info(f"Set apparat to {self.apparat}")
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def get_ppn(self, signature):
|
def get_ppn(self, signature: str):
|
||||||
self.signature = signature
|
self.signature = signature
|
||||||
if "+" in signature:
|
if "+" in signature:
|
||||||
signature = signature.replace("+", "%2B")
|
signature = signature.replace("+", "%2B")
|
||||||
@@ -79,15 +79,15 @@ class WebRequest:
|
|||||||
|
|
||||||
@sleep_and_retry
|
@sleep_and_retry
|
||||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||||
def search_book(self, searchterm: str):
|
def search_book(self, searchterm: str) -> str:
|
||||||
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
|
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
|
||||||
return response.text
|
return response.text
|
||||||
|
|
||||||
def get_book_links(self, searchterm: str):
|
def get_book_links(self, searchterm: str) -> list[str]:
|
||||||
response = self.search_book(searchterm)
|
response: str = self.search_book(searchterm) # type:ignore
|
||||||
soup = BeautifulSoup(response, "html.parser")
|
soup = BeautifulSoup(response, "html.parser")
|
||||||
links = soup.find_all("a", class_="title getFull")
|
links = soup.find_all("a", class_="title getFull")
|
||||||
res = []
|
res: list[str] = []
|
||||||
for link in links:
|
for link in links:
|
||||||
res.append(BASE + link["href"])
|
res.append(BASE + link["href"])
|
||||||
return res
|
return res
|
||||||
@@ -102,10 +102,11 @@ class WebRequest:
|
|||||||
logger.error(f"Request failed: {e}")
|
logger.error(f"Request failed: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_data(self):
|
def get_data(self) -> Union[list[str], None]:
|
||||||
links = self.get_book_links(self.ppn)
|
links = self.get_book_links(self.ppn)
|
||||||
|
logger.debug(f"Links: {links}")
|
||||||
for link in links:
|
for link in links:
|
||||||
result = self.search(link)
|
result: str = self.search(link) # type:ignore
|
||||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||||
# if found, return text of href
|
# if found, return text of href
|
||||||
soup = BeautifulSoup(result, "html.parser")
|
soup = BeautifulSoup(result, "html.parser")
|
||||||
@@ -117,7 +118,7 @@ class WebRequest:
|
|||||||
).text.strip()
|
).text.strip()
|
||||||
if self.use_any:
|
if self.use_any:
|
||||||
pre_tag = soup.find_all("pre")
|
pre_tag = soup.find_all("pre")
|
||||||
return_data = []
|
return_data: list[str] = []
|
||||||
if pre_tag:
|
if pre_tag:
|
||||||
for tag in pre_tag:
|
for tag in pre_tag:
|
||||||
data = tag.text.strip()
|
data = tag.text.strip()
|
||||||
@@ -126,7 +127,7 @@ class WebRequest:
|
|||||||
else:
|
else:
|
||||||
logger.error("No <pre> tag found")
|
logger.error("No <pre> tag found")
|
||||||
raise ValueError("No <pre> tag found")
|
raise ValueError("No <pre> tag found")
|
||||||
if f"Semesterapparat-{self.apparat}" in item_location:
|
elif f"Semesterapparat-{self.apparat}" in item_location:
|
||||||
pre_tag = soup.find_all("pre")
|
pre_tag = soup.find_all("pre")
|
||||||
return_data = []
|
return_data = []
|
||||||
if pre_tag:
|
if pre_tag:
|
||||||
@@ -137,6 +138,13 @@ class WebRequest:
|
|||||||
else:
|
else:
|
||||||
logger.error("No <pre> tag found")
|
logger.error("No <pre> tag found")
|
||||||
return return_data
|
return return_data
|
||||||
|
else:
|
||||||
|
logger.error(
|
||||||
|
f"Signature {self.signature} not found in {item_location}"
|
||||||
|
)
|
||||||
|
return_data = []
|
||||||
|
|
||||||
|
return return_data
|
||||||
|
|
||||||
def get_data_elsa(self):
|
def get_data_elsa(self):
|
||||||
links = self.get_book_links(self.ppn)
|
links = self.get_book_links(self.ppn)
|
||||||
@@ -184,7 +192,7 @@ class BibTextTransformer:
|
|||||||
self.signature = signature
|
self.signature = signature
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def get_data(self, data: list):
|
def get_data(self, data: Union[list[str]] = None) -> "BibTextTransformer":
|
||||||
RIS_IDENT = "TY -"
|
RIS_IDENT = "TY -"
|
||||||
ARRAY_IDENT = "[kid]"
|
ARRAY_IDENT = "[kid]"
|
||||||
COinS_IDENT = "ctx_ver"
|
COinS_IDENT = "ctx_ver"
|
||||||
@@ -217,7 +225,7 @@ class BibTextTransformer:
|
|||||||
self.data = line
|
self.data = line
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def return_data(self, option=None) -> BookData:
|
def return_data(self, option: Any = None) -> Union[BookData, None]:
|
||||||
"""Return Data to caller.
|
"""Return Data to caller.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -239,7 +247,7 @@ class BibTextTransformer:
|
|||||||
return RISData().transform(self.data)
|
return RISData().transform(self.data)
|
||||||
case "RDS":
|
case "RDS":
|
||||||
return RDSData().transform(self.data).return_data(option)
|
return RDSData().transform(self.data).return_data(option)
|
||||||
case None:
|
case _:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# if self.mode == "ARRAY":
|
# if self.mode == "ARRAY":
|
||||||
@@ -256,7 +264,7 @@ class BibTextTransformer:
|
|||||||
|
|
||||||
def cover(isbn):
|
def cover(isbn):
|
||||||
test_url = f"https://www.buchhandel.de/cover/{isbn}/{isbn}-cover-m.jpg"
|
test_url = f"https://www.buchhandel.de/cover/{isbn}/{isbn}-cover-m.jpg"
|
||||||
# print(test_url)
|
# logger.debug(test_url)
|
||||||
data = requests.get(test_url, stream=True)
|
data = requests.get(test_url, stream=True)
|
||||||
return data.content
|
return data.content
|
||||||
|
|
||||||
@@ -266,8 +274,8 @@ def get_content(soup, css_class):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# print("main")
|
# logger.debug("main")
|
||||||
link = "CU 8500 K64"
|
link = "CU 8500 K64"
|
||||||
data = WebRequest(71).get_ppn(link).get_data()
|
data = WebRequest(71).get_ppn(link).get_data()
|
||||||
bib = BibTextTransformer("ARRAY").get_data().return_data()
|
bib = BibTextTransformer("ARRAY").get_data().return_data()
|
||||||
print(bib)
|
logger.debug(bib)
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ class Book:
|
|||||||
self.location = value.split(",")[0] if "," in value else value
|
self.location = value.split(",")[0] if "," in value else value
|
||||||
self.publisher = value.split(",")[1] if "," in value else ""
|
self.publisher = value.split(",")[1] if "," in value else ""
|
||||||
elif key == "Standnummer":
|
elif key == "Standnummer":
|
||||||
self.signature = value
|
self.signature = value.strip()
|
||||||
elif key == "Interne Vermerke":
|
elif key == "Interne Vermerke":
|
||||||
self.internal_notes = value
|
self.internal_notes = value
|
||||||
|
|
||||||
@@ -212,7 +212,7 @@ def elsa_word_to_csv(path):
|
|||||||
data = [
|
data = [
|
||||||
row for row in df.itertuples(index=False, name=None) if row != tuples[doctype]
|
row for row in df.itertuples(index=False, name=None) if row != tuples[doctype]
|
||||||
]
|
]
|
||||||
# print(data)
|
# logger.debug(data)
|
||||||
return tuple_to_dict(data, doctype), doctype
|
return tuple_to_dict(data, doctype), doctype
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -7,8 +7,20 @@ from dataclasses import field as dataclass_field
|
|||||||
from typing import Any, List
|
from typing import Any, List
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
from src.logic.dataclass import BookData
|
from src.logic.dataclass import BookData
|
||||||
|
import sys
|
||||||
|
from loguru import logger as log
|
||||||
|
|
||||||
|
logger = log
|
||||||
|
logger.remove()
|
||||||
|
logger.add("logs/application.log", rotation="1 week", enqueue=True)
|
||||||
|
log.add(
|
||||||
|
"logs/transformers.log",
|
||||||
|
enqueue=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
|
||||||
|
logger.add(sys.stdout)
|
||||||
|
|
||||||
|
|
||||||
###Pydatnic models
|
###Pydatnic models
|
||||||
@@ -131,7 +143,7 @@ class ARRAYData:
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
# # print(f"ARRAYData.transform failed, {source}, {search}")
|
# # logger.debug(f"ARRAYData.transform failed, {source}, {search}")
|
||||||
logger.exception(f"ARRAYData.transform failed, no string {search}")
|
logger.exception(f"ARRAYData.transform failed, no string {search}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
@@ -509,4 +521,4 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
ret = RDSData().transform(data)
|
ret = RDSData().transform(data)
|
||||||
data = ret.return_data("rds_availability")
|
data = ret.return_data("rds_availability")
|
||||||
# print(data)
|
# logger.debug(data)
|
||||||
|
|||||||
@@ -765,7 +765,7 @@ class Ui(Ui_Semesterapparat):
|
|||||||
# create a thread that updates the progress label after each medium
|
# create a thread that updates the progress label after each medium
|
||||||
|
|
||||||
# self.bookGrabber = None
|
# self.bookGrabber = None
|
||||||
bookGrabber = BookGrabber(self.active_apparat)
|
bookGrabber = BookGrabber()
|
||||||
bookGrabber.add_values(
|
bookGrabber.add_values(
|
||||||
mode=mode,
|
mode=mode,
|
||||||
prof_id=prof_id,
|
prof_id=prof_id,
|
||||||
|
|||||||
Reference in New Issue
Block a user