add appnr, new url
This commit is contained in:
@@ -14,7 +14,7 @@ class BookGrabber(QThread):
|
|||||||
updateSignal = Signal(int, int)
|
updateSignal = Signal(int, int)
|
||||||
done = Signal()
|
done = Signal()
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, appnr):
|
||||||
super(BookGrabber, self).__init__(parent=None)
|
super(BookGrabber, self).__init__(parent=None)
|
||||||
self.is_Running = True
|
self.is_Running = True
|
||||||
self.logger = MyLogger("Worker")
|
self.logger = MyLogger("Worker")
|
||||||
@@ -24,6 +24,7 @@ class BookGrabber(QThread):
|
|||||||
self.prof_id = None
|
self.prof_id = None
|
||||||
self.mode = None
|
self.mode = None
|
||||||
self.book_id = None
|
self.book_id = None
|
||||||
|
self.appnr = appnr
|
||||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||||
|
|
||||||
def add_values(self, app_id, prof_id, mode, data):
|
def add_values(self, app_id, prof_id, mode, data):
|
||||||
@@ -46,7 +47,7 @@ class BookGrabber(QThread):
|
|||||||
signature = str(entry)
|
signature = str(entry)
|
||||||
self.logger.log_info("Processing entry: " + signature)
|
self.logger.log_info("Processing entry: " + signature)
|
||||||
|
|
||||||
webdata = WebRequest().get_ppn(entry).get_data()
|
webdata = WebRequest(self.appnr).get_ppn(entry).get_data()
|
||||||
if webdata == "error":
|
if webdata == "error":
|
||||||
continue
|
continue
|
||||||
bd = BibTextTransformer(self.mode).get_data(webdata).return_data()
|
bd = BibTextTransformer(self.mode).get_data(webdata).return_data()
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ class AvailChecker(QThread):
|
|||||||
count = 0
|
count = 0
|
||||||
for link in self.links:
|
for link in self.links:
|
||||||
self.logger.log_info("Processing entry: " + str(link))
|
self.logger.log_info("Processing entry: " + str(link))
|
||||||
data = WebRequest().get_ppn(link).get_data()
|
data = WebRequest(self.appnumber).get_ppn(link).get_data()
|
||||||
transformer = BibTextTransformer("RDS")
|
transformer = BibTextTransformer("RDS")
|
||||||
rds = transformer.get_data(data).return_data("rds_availability")
|
rds = transformer.get_data(data).return_data("rds_availability")
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,9 @@ logger = MyLogger(__name__)
|
|||||||
config = OmegaConf.load("config.yaml")
|
config = OmegaConf.load("config.yaml")
|
||||||
|
|
||||||
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
|
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
|
||||||
PPN_URL = 'https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?lookfor="{}"+&type=AllFields&limit=10&sort=py+desc%2C+title'
|
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||||
|
BASE = "https://rds.ibs-bw.de"
|
||||||
|
#
|
||||||
TITLE = "RDS_TITLE"
|
TITLE = "RDS_TITLE"
|
||||||
SIGNATURE = "RDS_SIGNATURE"
|
SIGNATURE = "RDS_SIGNATURE"
|
||||||
EDITION = "RDS_EDITION"
|
EDITION = "RDS_EDITION"
|
||||||
@@ -28,11 +30,15 @@ HEADERS = {
|
|||||||
|
|
||||||
|
|
||||||
class WebRequest:
|
class WebRequest:
|
||||||
def __init__(self) -> None:
|
def __init__(self, appnr) -> None:
|
||||||
"""Request data from the web, and format it depending on the mode."""
|
"""Request data from the web, and format it depending on the mode."""
|
||||||
|
self.apparat = appnr
|
||||||
|
if int(self.apparat) < 10:
|
||||||
|
self.apparat = f"0{self.apparat}"
|
||||||
self.signature = None
|
self.signature = None
|
||||||
self.ppn = None
|
self.ppn = None
|
||||||
self.data = None
|
self.data = None
|
||||||
|
self.timeout = 5
|
||||||
logger.log_info("Initialized WebRequest")
|
logger.log_info("Initialized WebRequest")
|
||||||
|
|
||||||
def get_ppn(self, signature):
|
def get_ppn(self, signature):
|
||||||
@@ -41,48 +47,52 @@ class WebRequest:
|
|||||||
signature = signature.replace("+", "%2B")
|
signature = signature.replace("+", "%2B")
|
||||||
if "doi.org" in signature:
|
if "doi.org" in signature:
|
||||||
signature = signature.split("/")[-1]
|
signature = signature.split("/")[-1]
|
||||||
url = PPN_URL.format(signature)
|
self.ppn = signature
|
||||||
page = requests.get(url)
|
|
||||||
|
|
||||||
soup = BeautifulSoup(page.content, "html.parser", from_encoding="utf-8")
|
|
||||||
if soup.find("div", class_="media") is None:
|
|
||||||
logger.log_error(f"No data found for {signature}")
|
|
||||||
return self
|
|
||||||
ppn = soup.find("div", class_="media").get("id")
|
|
||||||
self.ppn = ppn
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def get_link_data(self):
|
def search_book(self, searchterm: str):
|
||||||
page = requests.get(PPN_URL.format(self.ppn))
|
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
|
||||||
soup = BeautifulSoup(page.content, "html.parser")
|
return response.text
|
||||||
# find div that contains daia_ in the id
|
|
||||||
# find the pre tag in that div
|
|
||||||
# return the text
|
|
||||||
# div = soup.find("div",id=lambda x: x and "daia_" in x)
|
|
||||||
# pre = div.find("pre")
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def get_data(self) -> list[str] | str:
|
def get_book_links(self, searchterm: str):
|
||||||
# url = API_URL.format(self.ppn)
|
response = self.search_book(searchterm)
|
||||||
if self.ppn is None:
|
soup = BeautifulSoup(response, "html.parser")
|
||||||
logger.log_error("No PPN found")
|
links = soup.find_all("a", class_="title getFull")
|
||||||
return "error"
|
res = []
|
||||||
page = requests.get(API_URL.format(self.ppn))
|
for link in links:
|
||||||
logger.log_info(f"Requesting data from {API_URL.format(self.ppn)}")
|
res.append(BASE + link["href"])
|
||||||
logger.log_info(f"Status code: {page.status_code}")
|
return res
|
||||||
# print(page.content)
|
|
||||||
soup = BeautifulSoup(page.content, "html.parser")
|
def search(self, link: str):
|
||||||
|
response = requests.get(link, timeout=self.timeout)
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
def get_data(
|
||||||
|
self,
|
||||||
|
):
|
||||||
|
links = self.get_book_links(self.ppn)
|
||||||
|
print(links)
|
||||||
|
for link in links:
|
||||||
|
result = self.search(link)
|
||||||
|
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||||
|
# if found, return text of href
|
||||||
|
soup = BeautifulSoup(result, "html.parser")
|
||||||
|
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||||
|
if locations:
|
||||||
|
for location in locations:
|
||||||
|
item_location = location.find(
|
||||||
|
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||||
|
).text.strip()
|
||||||
|
print(item_location)
|
||||||
|
if f"Semesterapparat-{self.apparat}" in item_location:
|
||||||
pre_tag = soup.find_all("pre")
|
pre_tag = soup.find_all("pre")
|
||||||
# print(pre_tag)
|
|
||||||
return_data = []
|
return_data = []
|
||||||
|
|
||||||
if pre_tag:
|
if pre_tag:
|
||||||
for tag in pre_tag:
|
for tag in pre_tag:
|
||||||
data = tag.text.strip()
|
data = tag.text.strip()
|
||||||
return_data.append(data)
|
return_data.append(data)
|
||||||
return return_data
|
return return_data
|
||||||
else:
|
else:
|
||||||
print("No <pre> tag found")
|
|
||||||
logger.log_error("No <pre> tag found")
|
logger.log_error("No <pre> tag found")
|
||||||
return return_data
|
return return_data
|
||||||
|
|
||||||
@@ -109,7 +119,7 @@ class BibTextTransformer:
|
|||||||
self.data = None
|
self.data = None
|
||||||
# self.bookdata = BookData(**self.data)
|
# self.bookdata = BookData(**self.data)
|
||||||
|
|
||||||
def get_data(self, data: list) -> str:
|
def get_data(self, data: list):
|
||||||
RIS_IDENT = "TY -"
|
RIS_IDENT = "TY -"
|
||||||
ARRAY_IDENT = "[kid]"
|
ARRAY_IDENT = "[kid]"
|
||||||
COinS_IDENT = "ctx_ver"
|
COinS_IDENT = "ctx_ver"
|
||||||
|
|||||||
Reference in New Issue
Block a user