add files
This commit is contained in:
0
src/logic/__init__.py
Normal file
0
src/logic/__init__.py
Normal file
59
src/logic/c_sort.py
Normal file
59
src/logic/c_sort.py
Normal file
@@ -0,0 +1,59 @@
|
||||
from typing import List, Tuple
|
||||
|
||||
from natsort import natsorted
|
||||
|
||||
|
||||
def custom_sort(unsorted: List[Tuple[str, int, int]]) -> List[Tuple[str, int, int]]:
|
||||
"""Sort a list of semesters in the format "SoSe n" and "WiSe n/n+1" in the correct order.
|
||||
Where n == year in 2 digit format
|
||||
|
||||
Args:
|
||||
----
|
||||
unsorted (list[tuple]): List of semesters in the format "SoSe n" and "WiSe n/n+1"
|
||||
|
||||
Returns:
|
||||
-------
|
||||
ret (list[tuple]): Sorted list in correct order of WiSe n/n+1 and SoSe n
|
||||
"""
|
||||
summer = natsorted([i for i in unsorted if "SoSe" in i[0]])
|
||||
winter = natsorted([i for i in unsorted if "WiSe" in i[0]])
|
||||
summer = natsorted(summer, key=lambda x: x[0])
|
||||
winter = natsorted(winter, key=lambda x: x[0])
|
||||
|
||||
# Merge the lists
|
||||
ret = []
|
||||
i = 0
|
||||
j = 0
|
||||
while i < len(summer) and j < len(winter):
|
||||
if summer[i][0][5:] <= winter[j][0][5:]:
|
||||
ret.append(summer[i])
|
||||
i += 1
|
||||
else:
|
||||
ret.append(winter[j])
|
||||
j += 1
|
||||
|
||||
# Append the remaining items
|
||||
while i < len(summer):
|
||||
ret.append(summer[i])
|
||||
i += 1
|
||||
while j < len(winter):
|
||||
ret.append(winter[j])
|
||||
j += 1
|
||||
|
||||
return ret
|
||||
|
||||
# Test the function
|
||||
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unsorted = [
|
||||
("WiSe 23/24", 7, 5),
|
||||
("SoSe 23", 5, 0),
|
||||
("SoSe 22", 1, 0),
|
||||
("WiSe 22/23", 1, 0),
|
||||
("SoSe 15", 1, 0),
|
||||
]
|
||||
|
||||
print(custom_sort(unsorted))
|
||||
221
src/logic/constants.py
Normal file
221
src/logic/constants.py
Normal file
@@ -0,0 +1,221 @@
|
||||
APP_NRS = [i for i in range(1, 181)]
|
||||
|
||||
PROF_TITLES = [
|
||||
"Dr. mult.",
|
||||
"Dr. paed.",
|
||||
"Dr. rer. pol.",
|
||||
"Dr. sc. techn.",
|
||||
"Drs.",
|
||||
"Dr. agr.",
|
||||
"Dr. habil.",
|
||||
"Dr. oec.",
|
||||
"Dr. med.",
|
||||
"Dr. e. h.",
|
||||
"Dr. oec. publ.",
|
||||
"Dr. -Ing.",
|
||||
"Dr. theol.",
|
||||
"Dr. med. vet.",
|
||||
"Dr. ing.",
|
||||
"Dr. rer. nat.",
|
||||
"Dr. des.",
|
||||
"Dr. sc. mus.",
|
||||
"Dr. h. c.",
|
||||
"Dr. pharm.",
|
||||
"Dr. med. dent.",
|
||||
"Dr. phil. nat.",
|
||||
"Dr. phil.",
|
||||
"Dr. iur.",
|
||||
"Dr.",
|
||||
"Kein Titel",
|
||||
]
|
||||
SEMAP_MEDIA_ACCOUNT_PREFIX = "10080"
|
||||
semaps = {
|
||||
"1": "0005",
|
||||
"2": "0018",
|
||||
"3": "0021",
|
||||
"4": "0034",
|
||||
"5": "0047",
|
||||
"6": "0050",
|
||||
"7": "0063",
|
||||
"8": "0076",
|
||||
"9": "0089",
|
||||
"10": "0092",
|
||||
"11": "0104",
|
||||
"12": "0117",
|
||||
"13": "0120",
|
||||
"14": "0133",
|
||||
"15": "0146",
|
||||
"16": "0159",
|
||||
"17": "0162",
|
||||
"18": "0175",
|
||||
"19": "0188",
|
||||
"20": "0191",
|
||||
"21": "0203",
|
||||
"22": "0216",
|
||||
"23": "0229",
|
||||
"24": "0232",
|
||||
"25": "0245",
|
||||
"26": "0258",
|
||||
"27": "0261",
|
||||
"28": "0274",
|
||||
"29": "0287",
|
||||
"30": "0290",
|
||||
"31": "0302",
|
||||
"32": "0315",
|
||||
"33": "0328",
|
||||
"34": "0331",
|
||||
"35": "0344",
|
||||
"36": "0357",
|
||||
"37": "0360",
|
||||
"38": "0373",
|
||||
"39": "0386",
|
||||
"40": "0399",
|
||||
"41": "0401",
|
||||
"42": "0414",
|
||||
"43": "0427",
|
||||
"44": "0430",
|
||||
"45": "0443",
|
||||
"46": "0456",
|
||||
"47": "0469",
|
||||
"48": "0472",
|
||||
"49": "0485",
|
||||
"50": "0498",
|
||||
"51": "0500",
|
||||
"52": "0513",
|
||||
"53": "0526",
|
||||
"54": "0539",
|
||||
"55": "0542",
|
||||
"56": "0555",
|
||||
"57": "0568",
|
||||
"58": "0571",
|
||||
"59": "0584",
|
||||
"60": "0597",
|
||||
"61": "0609",
|
||||
"62": "0612",
|
||||
"63": "0625",
|
||||
"64": "0638",
|
||||
"65": "0641",
|
||||
"66": "0654",
|
||||
"67": "0667",
|
||||
"68": "0670",
|
||||
"69": "0683",
|
||||
"70": "0696",
|
||||
"71": "0708",
|
||||
"72": "0711",
|
||||
"73": "0724",
|
||||
"74": "0737",
|
||||
"75": "0740",
|
||||
"76": "0753",
|
||||
"77": "0766",
|
||||
"78": "0779",
|
||||
"79": "0782",
|
||||
"80": "0795",
|
||||
"81": "0807",
|
||||
"82": "0810",
|
||||
"83": "0823",
|
||||
"84": "0836",
|
||||
"85": "0849",
|
||||
"86": "0852",
|
||||
"87": "0865",
|
||||
"88": "0878",
|
||||
"89": "0881",
|
||||
"90": "0894",
|
||||
"91": "0906",
|
||||
"92": "0919",
|
||||
"93": "0922",
|
||||
"94": "0935",
|
||||
"95": "0948",
|
||||
"96": "0951",
|
||||
"97": "0964",
|
||||
"98": "0977",
|
||||
"99": "0980",
|
||||
"100": "0993",
|
||||
"101": "1002",
|
||||
"102": "1015",
|
||||
"103": "1028",
|
||||
"104": "1031",
|
||||
"105": "1044",
|
||||
"106": "1057",
|
||||
"107": "1060",
|
||||
"108": "1073",
|
||||
"109": "1086",
|
||||
"110": "1099",
|
||||
"111": "1101",
|
||||
"112": "1114",
|
||||
"113": "1127",
|
||||
"114": "1130",
|
||||
"115": "1143",
|
||||
"116": "1156",
|
||||
"117": "1169",
|
||||
"118": "1172",
|
||||
"119": "1185",
|
||||
"120": "1198",
|
||||
"121": "1200",
|
||||
"122": "1213",
|
||||
"123": "1226",
|
||||
"124": "1239",
|
||||
"125": "1242",
|
||||
"126": "1255",
|
||||
"127": "1268",
|
||||
"128": "1271",
|
||||
"129": "1284",
|
||||
"130": "1297",
|
||||
"131": "1309",
|
||||
"132": "1312",
|
||||
"133": "1325",
|
||||
"134": "1338",
|
||||
"135": "1341",
|
||||
"136": "1354",
|
||||
"137": "1367",
|
||||
"138": "1370",
|
||||
"139": "1383",
|
||||
"140": "1396",
|
||||
"141": "1408",
|
||||
"142": "1411",
|
||||
"143": "1424",
|
||||
"144": "1437",
|
||||
"145": "1440",
|
||||
"146": "1453",
|
||||
"147": "1466",
|
||||
"148": "1479",
|
||||
"149": "1482",
|
||||
"150": "1495",
|
||||
"151": "1507",
|
||||
"152": "1510",
|
||||
"153": "1523",
|
||||
"154": "1536",
|
||||
"155": "1549",
|
||||
"156": "1552",
|
||||
"157": "1565",
|
||||
"158": "1578",
|
||||
"159": "1581",
|
||||
"160": "1594",
|
||||
"161": "1606",
|
||||
"162": "1619",
|
||||
"163": "1622",
|
||||
"164": "1635",
|
||||
"165": "1648",
|
||||
"166": "1651",
|
||||
"167": "1664",
|
||||
"168": "1677",
|
||||
"169": "1680",
|
||||
"170": "1693",
|
||||
"171": "1705",
|
||||
"172": "1718",
|
||||
"173": "1721",
|
||||
"174": "1734",
|
||||
"175": "1747",
|
||||
"176": "1750",
|
||||
"177": "1763",
|
||||
"178": "1776",
|
||||
"179": "1789",
|
||||
"180": "1792",
|
||||
}
|
||||
# take the semaps dict and add the prefix to the values
|
||||
for key, value in semaps.items():
|
||||
semaps[key] = f"{SEMAP_MEDIA_ACCOUNT_PREFIX}{value}{value[-1]}"
|
||||
SEMAP_MEDIA_ACCOUNTS = semaps
|
||||
|
||||
# for s in SEMAP_MEDIA_ACCOUNTS:
|
||||
# assert len(SEMAP_MEDIA_ACCOUNTS[s]) == 10, f"semap {s} has wrong length"
|
||||
# print(f"{SEMAP_MEDIA_ACCOUNTS[s]}")
|
||||
27
src/logic/csvparser.py
Normal file
27
src/logic/csvparser.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import csv
|
||||
|
||||
import pandas as pdf
|
||||
|
||||
|
||||
def csv_to_list(path: str) -> list[str]:
|
||||
"""
|
||||
Extracts the data from a csv file and returns it as a pandas dataframe
|
||||
"""
|
||||
with open(path, newline='') as csvfile:
|
||||
reader = csv.reader(csvfile, delimiter=';', quotechar='|')
|
||||
data = []
|
||||
for row in reader:
|
||||
for i in range(len(row)):
|
||||
row[i] = row[i].replace('"', "")
|
||||
data.append(row)
|
||||
ret= []
|
||||
for i in data:
|
||||
ret.append(i[0])
|
||||
return ret
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
text = csv_to_list("C:/Users/aky547/Desktop/semap/71.csv")
|
||||
#remove linebreaks
|
||||
print(text)
|
||||
76
src/logic/dataclass.py
Normal file
76
src/logic/dataclass.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class ApparatData:
|
||||
prof_title: str | None = None
|
||||
profname: str | None = None
|
||||
dauerapp: bool = False
|
||||
appnr: int | None = None
|
||||
appname: str | None = None
|
||||
app_fach: str | None = None
|
||||
semester: str | None = None
|
||||
erstellsemester: str | None = None
|
||||
prof_mail: str | None = None
|
||||
prof_tel: int | None = None
|
||||
deleted: int = 0
|
||||
prof_adis_id: int | None = None
|
||||
apparat_adis_id: int | None = None
|
||||
|
||||
def get_prof_details(self) -> dict:
|
||||
return {
|
||||
"prof_title": self.prof_title,
|
||||
"profname": self.profname,
|
||||
"prof_mail": self.prof_mail,
|
||||
"prof_tel": self.prof_tel,
|
||||
"fullname": self.profname,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class BookData:
|
||||
ppn: str | None = None
|
||||
title: str | None = None
|
||||
signature: str | None = None
|
||||
edition: str | None = None
|
||||
link: str | None = None
|
||||
isbn: str | list | None = field(default_factory=list)
|
||||
author: str | None = None
|
||||
language: str | list | None = field(default_factory=list)
|
||||
publisher: str | None = None
|
||||
year: str | None = None
|
||||
pages: str | None = None
|
||||
# avaliability: dict | None = field(default_factory=dict)
|
||||
# def assign(self, field,value):
|
||||
# self.__setattr__(field,value)
|
||||
|
||||
def from_dict(self, data: dict):
|
||||
for key, value in data.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
def to_dict(self):
|
||||
return self.__dict__
|
||||
|
||||
def from_dataclass(self, dataclass):
|
||||
for key, value in dataclass.__dict__.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
def from_string(self, data: str):
|
||||
if not data.startswith("BookData"):
|
||||
raise ValueError("No valid BookData string")
|
||||
else:
|
||||
pattern = r"(\w+)='([^']*)'"
|
||||
data_dict = dict(re.findall(pattern, data))
|
||||
print(data_dict)
|
||||
for key, value in data_dict.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class MailData:
|
||||
subject: str | None = None
|
||||
body: str | None = None
|
||||
mailto: str | None = None
|
||||
prof: str | None = None
|
||||
45
src/logic/fileparser.py
Normal file
45
src/logic/fileparser.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import csv
|
||||
|
||||
import pandas as pd
|
||||
from docx import Document
|
||||
|
||||
|
||||
def csv_to_list(path: str) -> list[str]:
|
||||
"""
|
||||
Extracts the data from a csv file and returns it as a pandas dataframe
|
||||
"""
|
||||
with open(path, newline="") as csvfile:
|
||||
reader = csv.reader(csvfile, delimiter=";", quotechar="|")
|
||||
data = []
|
||||
for row in reader:
|
||||
for i in range(len(row)):
|
||||
row[i] = row[i].replace('"', "")
|
||||
data.append(row)
|
||||
ret = []
|
||||
for i in data:
|
||||
ret.append(i[0])
|
||||
return ret
|
||||
|
||||
|
||||
def word_docx_to_csv(path) -> pd.DataFrame:
|
||||
doc = Document(path)
|
||||
tables = doc.tables
|
||||
|
||||
m_data = []
|
||||
for table in tables:
|
||||
data = []
|
||||
for row in table.rows:
|
||||
row_data = []
|
||||
for cell in row.cells:
|
||||
text = cell.text
|
||||
text = text.replace("\n", "")
|
||||
row_data.append(text)
|
||||
data.append(row_data)
|
||||
df = pd.DataFrame(data)
|
||||
df.columns = df.iloc[0]
|
||||
df = df.iloc[1:]
|
||||
|
||||
m_data.append(df)
|
||||
|
||||
df = m_data[2]
|
||||
return df
|
||||
31
src/logic/get_msword_content.py
Normal file
31
src/logic/get_msword_content.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from docx import Document
|
||||
|
||||
data={}
|
||||
wordDoc = Document('files/Semesterapparat - Anmeldung.docx')
|
||||
paragraphs = wordDoc.tables
|
||||
for table in paragraphs:
|
||||
for column in table.columns:
|
||||
cellcount=0
|
||||
for cell in column.cells:
|
||||
if cellcount<12:
|
||||
cellcount+=1
|
||||
print(f'cell:{cell.text}')
|
||||
|
||||
|
||||
# print(f'paragraphs[{i}]: {paragraphs[i]}')
|
||||
# data[i] = paragraphs[i]
|
||||
|
||||
# for i in range(0, len(paragraphs)):
|
||||
# for i in range(2, len(paragraphs)):
|
||||
# data[i] = paragraphs[i]
|
||||
|
||||
print(data)
|
||||
|
||||
# for table in wordDoc.tables:
|
||||
# for row in table.rows:
|
||||
# print('---')
|
||||
# for cell in row.cells:
|
||||
# print(f'cell:{cell.text}')
|
||||
|
||||
|
||||
|
||||
11
src/logic/get_pdf_content.py
Normal file
11
src/logic/get_pdf_content.py
Normal file
@@ -0,0 +1,11 @@
|
||||
import tabula
|
||||
|
||||
|
||||
file="files/Semesterapparat - Anmeldung.pdf"
|
||||
|
||||
def extract_book_data(file):
|
||||
tables=tabula.read_pdf(file,pages="all",encoding="utf-8",multiple_tables=True)
|
||||
tabula.convert_into(file, file.replace(".pdf"), output_format="csv", pages="all")
|
||||
with open("files/Semesterapparat - Anmeldung.csv", "r") as f:
|
||||
content=f.read()
|
||||
|
||||
0
src/logic/mail.py
Normal file
0
src/logic/mail.py
Normal file
30
src/logic/pdfparser.py
Normal file
30
src/logic/pdfparser.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# add depend path to system path
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
from pdfquery import PDFQuery
|
||||
|
||||
|
||||
def pdf_to_csv(path: str) -> pd.DataFrame:
|
||||
"""
|
||||
Extracts the data from a pdf file and returns it as a pandas dataframe
|
||||
"""
|
||||
file = PDFQuery(path)
|
||||
file.load()
|
||||
#get the text from the pdf file
|
||||
text_elems = file.extract([
|
||||
('with_formatter', 'text'),
|
||||
('all_text', '*')
|
||||
])
|
||||
extracted_text = text_elems['all_text']
|
||||
|
||||
return extracted_text
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
text = pdf_to_csv("54_pdf.pdf")
|
||||
#remove linebreaks
|
||||
text = text.replace("\n", "")
|
||||
print(text)
|
||||
|
||||
20
src/logic/settings.py
Normal file
20
src/logic/settings.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import yaml
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
@dataclass
|
||||
class Settings:
|
||||
"""Settings for the app."""
|
||||
save_path: str
|
||||
database_name: str
|
||||
database_path: str
|
||||
default_apps:bool = True
|
||||
custom_applications: list[dict] = field(default_factory=list)
|
||||
def save_settings(self):
|
||||
"""Save the settings to the config file."""
|
||||
with open("config.yaml", "w") as f:
|
||||
yaml.dump(self.__dict__, f)
|
||||
|
||||
#open the config file and load the settings
|
||||
with open("config.yaml", "r") as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
195
src/logic/threads.py
Normal file
195
src/logic/threads.py
Normal file
@@ -0,0 +1,195 @@
|
||||
import threading
|
||||
import time
|
||||
|
||||
from PyQt6.QtCore import QThread, pyqtSignal
|
||||
|
||||
from src.backend.database import Database
|
||||
from log import MyLogger
|
||||
from src.transformers import RDS_AVAIL_DATA
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
import sqlite3
|
||||
|
||||
class BookGrabber(QThread):
|
||||
updateSignal = pyqtSignal(int, int)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
mode: str = None,
|
||||
data: list = None,
|
||||
app_id: int = None,
|
||||
prof_id: int = None,
|
||||
parent=None,
|
||||
):
|
||||
super().__init__(parent)
|
||||
self.logger = MyLogger("Worker")
|
||||
self.logger.log_info("Starting worker thread")
|
||||
self.logger.log_info("Worker thread started")
|
||||
self.app_id = app_id
|
||||
self.prof_id = prof_id
|
||||
self.mode = mode
|
||||
self.data = data
|
||||
self.book_id = None
|
||||
self.db_lock = threading.Lock()
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
item = 0
|
||||
for entry in self.data:
|
||||
signature = str(entry)
|
||||
self.logger.log_info("Processing entry: " + signature)
|
||||
|
||||
webdata = WebRequest().get_ppn(entry).get_data()
|
||||
if webdata == "error":
|
||||
continue
|
||||
bd = BibTextTransformer(self.mode).get_data(webdata).return_data()
|
||||
transformer = BibTextTransformer("RDS")
|
||||
rds = transformer.get_data(webdata).return_data("rds_availability")
|
||||
bd.signature = entry
|
||||
with self.db_lock:
|
||||
#confirm lock is acquired
|
||||
print("lock acquired, adding book to database")
|
||||
self.db.add_medium(bd, self.app_id, self.prof_id)
|
||||
# get latest book id
|
||||
self.book_id = self.db.get_latest_book_id()
|
||||
self.logger.log_info("Added book to database")
|
||||
state = 0
|
||||
for rds_item in rds.items:
|
||||
sign = rds_item.superlocation
|
||||
loc = rds_item.location
|
||||
# print(item.location)
|
||||
if self.app_id in sign or self.app_id in loc:
|
||||
state = 1
|
||||
book_id = None
|
||||
# for book in self.books:
|
||||
# if book["bookdata"].signature == entry:
|
||||
# book_id = book["id"]
|
||||
# break
|
||||
self.logger.log_info(f"State of {signature}: {state}")
|
||||
with self.db_lock:
|
||||
print(
|
||||
"lock acquired, updating availability of "
|
||||
+ str(book_id)
|
||||
+ " to "
|
||||
+ str(state)
|
||||
)
|
||||
try:
|
||||
self.db.set_availability(self.book_id, state)
|
||||
except sqlite3.OperationalError as e:
|
||||
self.logger.log_error(f"Failed to update availability: {e}")
|
||||
break
|
||||
|
||||
# time.sleep(5)
|
||||
item += 1
|
||||
self.updateSignal.emit(item, len(self.data))
|
||||
self.logger.log_info("Worker thread finished")
|
||||
# teminate thread
|
||||
|
||||
self.quit()
|
||||
|
||||
|
||||
class AvailChecker(QThread):
|
||||
updateSignal = pyqtSignal(str, int)
|
||||
|
||||
def __init__(
|
||||
self, links: list = [], appnumber: int = None, parent=None, books=list[dict]
|
||||
):
|
||||
if links is None:
|
||||
links = []
|
||||
super().__init__(parent)
|
||||
self.logger = MyLogger("AvailChecker")
|
||||
self.logger.log_info("Starting worker thread")
|
||||
self.logger.log_info(
|
||||
"Checking availability for "
|
||||
+ str(links)
|
||||
+ " with appnumber "
|
||||
+ str(appnumber)
|
||||
+ "..."
|
||||
)
|
||||
self.links = links
|
||||
self.appnumber = appnumber
|
||||
self.books = books
|
||||
self.db_lock = threading.Lock()
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
state = 0
|
||||
|
||||
for link in self.links:
|
||||
self.logger.log_info("Processing entry: " + str(link))
|
||||
data = WebRequest().get_ppn(link).get_data()
|
||||
transformer = BibTextTransformer("RDS")
|
||||
rds = transformer.get_data(data).return_data("rds_availability")
|
||||
print(rds)
|
||||
for item in rds.items:
|
||||
sign = item.superlocation
|
||||
loc = item.location
|
||||
# print(item.location)
|
||||
if self.appnumber in sign or self.appnumber in loc:
|
||||
state = 1
|
||||
book_id = None
|
||||
for book in self.books:
|
||||
if book["bookdata"].signature == link:
|
||||
book_id = book["id"]
|
||||
break
|
||||
self.logger.log_info(f"State of {link}: " + str(state))
|
||||
with self.db_lock:
|
||||
print(
|
||||
"lock acquired, updating availability of "
|
||||
+ str(book_id)
|
||||
+ " to "
|
||||
+ str(state)
|
||||
)
|
||||
self.db.set_availability(book_id, state)
|
||||
break
|
||||
self.updateSignal.emit(item.callnumber, state)
|
||||
|
||||
self.logger.log_info("Worker thread finished")
|
||||
# teminate thread
|
||||
|
||||
self.quit()
|
||||
|
||||
|
||||
class AutoAdder(QThread):
|
||||
updateSignal = pyqtSignal(int)
|
||||
|
||||
setTextSignal = pyqtSignal(int)
|
||||
progress = pyqtSignal(int)
|
||||
|
||||
def __init__(self, data=None, app_id=None, prof_id=None, parent=None):
|
||||
super().__init__(parent)
|
||||
self.logger = MyLogger("AutoAdder")
|
||||
self.data = data
|
||||
self.app_id = app_id
|
||||
self.prof_id = prof_id
|
||||
|
||||
print("Launched AutoAdder")
|
||||
print(self.data, self.app_id, self.prof_id)
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
# show the dialog, start the thread to gather data and dynamically update progressbar and listwidget
|
||||
self.logger.log_info("Starting worker thread")
|
||||
item = 0
|
||||
for entry in self.data:
|
||||
try:
|
||||
# webdata = WebRequest().get_ppn(entry).get_data()
|
||||
# bd = BibTextTransformer("ARRAY").get_data(webdata).return_data()
|
||||
# bd.signature = entry
|
||||
self.updateSignal.emit(item)
|
||||
self.setTextSignal.emit(entry)
|
||||
# qsleep
|
||||
item += 1
|
||||
self.progress.emit(item)
|
||||
print(item, len(self.data))
|
||||
time.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.logger.log_exception(
|
||||
f"The query failed with message {e} for signature {entry}"
|
||||
)
|
||||
continue
|
||||
if item == len(self.data):
|
||||
self.logger.log_info("Worker thread finished")
|
||||
# teminate thread
|
||||
self.finished.emit()
|
||||
1974
src/logic/userInterface.py
Normal file
1974
src/logic/userInterface.py
Normal file
File diff suppressed because it is too large
Load Diff
176
src/logic/webrequest.py
Normal file
176
src/logic/webrequest.py
Normal file
@@ -0,0 +1,176 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from omegaconf import OmegaConf
|
||||
|
||||
from src.logic.dataclass import BookData
|
||||
from log import MyLogger
|
||||
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
|
||||
#import sleep_and_retry decorator to retry requests
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
|
||||
logger = MyLogger(__name__)
|
||||
config = OmegaConf.load("config.yaml")
|
||||
|
||||
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
|
||||
PPN_URL = 'https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?lookfor="{}"+&type=AllFields&limit=10&sort=py+desc%2C+title'
|
||||
TITLE = "RDS_TITLE"
|
||||
SIGNATURE = "RDS_SIGNATURE"
|
||||
EDITION = "RDS_EDITION"
|
||||
ISBN = "RDS_ISBN"
|
||||
AUTHOR = "RDS_PERSON"
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
|
||||
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
|
||||
"Accept-Language": "en-US, en;q=0.5",
|
||||
}
|
||||
|
||||
|
||||
class WebRequest:
|
||||
def __init__(self) -> None:
|
||||
"""Request data from the web, and format it depending on the mode."""
|
||||
self.signature = None
|
||||
self.ppn = None
|
||||
self.data = None
|
||||
logger.log_info("Initialized WebRequest")
|
||||
|
||||
def get_ppn(self, signature):
|
||||
self.signature = signature
|
||||
if "+" in signature:
|
||||
signature = signature.replace("+", "%2B")
|
||||
if "doi.org" in signature:
|
||||
signature = signature.split("/")[-1]
|
||||
url = PPN_URL.format(signature)
|
||||
page = requests.get(url)
|
||||
|
||||
soup = BeautifulSoup(page.content, "html.parser", from_encoding="utf-8")
|
||||
if soup.find("div", class_="media") is None:
|
||||
logger.log_error(f"No data found for {signature}")
|
||||
return self
|
||||
ppn = soup.find("div", class_="media").get("id")
|
||||
self.ppn = ppn
|
||||
return self
|
||||
|
||||
def get_link_data(self):
|
||||
page = requests.get(PPN_URL.format(self.ppn))
|
||||
soup = BeautifulSoup(page.content, "html.parser")
|
||||
# find div that contains daia_ in the id
|
||||
# find the pre tag in that div
|
||||
# return the text
|
||||
# div = soup.find("div",id=lambda x: x and "daia_" in x)
|
||||
# pre = div.find("pre")
|
||||
return soup
|
||||
|
||||
def get_data(self) -> list[str] | str:
|
||||
# url = API_URL.format(self.ppn)
|
||||
if self.ppn is None:
|
||||
logger.log_error("No PPN found")
|
||||
return "error"
|
||||
page = requests.get(API_URL.format(self.ppn))
|
||||
logger.log_info(f"Requesting data from {API_URL.format(self.ppn)}")
|
||||
logger.log_info(f"Status code: {page.status_code}")
|
||||
# print(page.content)
|
||||
soup = BeautifulSoup(page.content, "html.parser")
|
||||
pre_tag = soup.find_all("pre")
|
||||
# print(pre_tag)
|
||||
return_data = []
|
||||
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
print("No <pre> tag found")
|
||||
logger.log_error("No <pre> tag found")
|
||||
return return_data
|
||||
|
||||
|
||||
class BibTextTransformer:
|
||||
def __init__(self, mode: str) -> None:
|
||||
self.mode = mode
|
||||
self.field = None
|
||||
# print(self.field)
|
||||
self.data = None
|
||||
# self.bookdata = BookData(**self.data)
|
||||
|
||||
def get_data(self, data: list) -> str:
|
||||
RIS_IDENT = "TY -"
|
||||
ARRAY_IDENT = "[kid]"
|
||||
COinS_IDENT = "ctx_ver"
|
||||
BIBTEX_IDENT = "@book"
|
||||
RDS_IDENT = "RDS ---------------------------------- "
|
||||
if self.mode == "RIS":
|
||||
for line in data:
|
||||
if RIS_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "ARRAY":
|
||||
for line in data:
|
||||
if ARRAY_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "COinS":
|
||||
for line in data:
|
||||
if COinS_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "BibTeX":
|
||||
for line in data:
|
||||
if BIBTEX_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "RDS":
|
||||
for line in data:
|
||||
if RDS_IDENT in line:
|
||||
self.data = line
|
||||
return self
|
||||
|
||||
def return_data(self, option=None) -> BookData:
|
||||
"""Return Data to caller.
|
||||
|
||||
Args:
|
||||
option (string, optional): Option for RDS as there are two filetypes. Use rds_availability or rds_data. Anything else gives a dict of both responses. Defaults to None.
|
||||
|
||||
Returns:
|
||||
BookData: _description_
|
||||
"""
|
||||
if self.mode == "ARRAY":
|
||||
return ARRAYData().transform(self.data)
|
||||
elif self.mode == "COinS":
|
||||
return COinSData().transform(self.data)
|
||||
elif self.mode == "BibTeX":
|
||||
return BibTeXData().transform(self.data)
|
||||
elif self.mode == "RIS":
|
||||
return RISData().transform(self.data)
|
||||
elif self.mode == "RDS":
|
||||
return RDSData().transform(self.data).return_data(option)
|
||||
|
||||
|
||||
def cover(isbn):
|
||||
test_url = f"https://www.buchhandel.de/cover/{isbn}/{isbn}-cover-m.jpg"
|
||||
print(test_url)
|
||||
data = requests.get(test_url, stream=True)
|
||||
return data.content
|
||||
|
||||
|
||||
def get_content(soup, css_class):
|
||||
return soup.find("div", class_=css_class).text.strip()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("main")
|
||||
link = "ZE 77000 W492"
|
||||
data = WebRequest().get_ppn(link).get_data()
|
||||
|
||||
print(data)
|
||||
# # data.get_ppn("ME 3000 S186 (2)")
|
||||
# # print(data.ppn)
|
||||
# # desc=data.get_data()
|
||||
# # print(type(desc))
|
||||
# # print(desc)
|
||||
# txt = (
|
||||
# BibTextTransformer("RIS")
|
||||
# .get_data(WebRequest().get_ppn("ST 250 U42 (15)").get_data())
|
||||
# .return_data()
|
||||
# )
|
||||
# print(txt)
|
||||
|
||||
# print(data)
|
||||
# print(BibTextTransformer(data).bookdata)
|
||||
26
src/logic/wordparser.py
Normal file
26
src/logic/wordparser.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import pandas as pd
|
||||
from docx import Document
|
||||
|
||||
|
||||
def word_docx_to_csv(path) -> pd.DataFrame:
|
||||
doc = Document(path)
|
||||
tables = doc.tables
|
||||
|
||||
m_data = []
|
||||
for table in tables:
|
||||
data = []
|
||||
for row in table.rows:
|
||||
row_data = []
|
||||
for cell in row.cells:
|
||||
text = cell.text
|
||||
text = text.replace("\n", "")
|
||||
row_data.append(text)
|
||||
data.append(row_data)
|
||||
df = pd.DataFrame(data)
|
||||
df.columns = df.iloc[0]
|
||||
df = df.iloc[1:]
|
||||
|
||||
m_data.append(df)
|
||||
|
||||
df = m_data[2]
|
||||
return df
|
||||
Reference in New Issue
Block a user