update code, add all features

This commit is contained in:
2025-03-12 11:35:13 +01:00
parent f29167b404
commit 4dd0710429
11 changed files with 2267 additions and 0 deletions

4
main.py Normal file
View File

@@ -0,0 +1,4 @@
from src.ui.interface import launch
if __name__ == "__main__":
launch()

28
pyproject.toml Normal file
View File

@@ -0,0 +1,28 @@
[project]
name = "linkavailablechecker"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"beautifulsoup4>=4.12.3",
"loguru>=0.7.3",
"playwright>=1.49.1",
"pyqt6-charts>=6.8.0",
"pyqt6>=6.8.1",
"pyqtgraph>=0.13.7",
"ratelimit>=2.2.1",
"requests>=2.32.3",
"prettytable>=3.14.0",
"cloudscraper>=1.2.71",
"limit>=0.2.3",
"alive-progress>=3.2.0",
"tqdm>=4.67.1",
]
[dependency-groups]
dev = [
"pip>=25.0",
"selenium>=4.29.0",
"undetected-chromedriver>=3.5.5",
]

1
src/__init__.py Normal file
View File

@@ -0,0 +1 @@
from .ui.interface import MainWindow

169
src/database.py Normal file
View File

@@ -0,0 +1,169 @@
import sqlite3
class Database:
def __init__(self, db_name):
self.conn = sqlite3.connect(db_name)
self.cursor = self.conn.cursor()
self.create_table()
def create_table(self):
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS response (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ppn TEXT NOT NULL,
source_link TEXT ,
destination_link TEXT,
response_code INTEGER NOT NULL DEFAULT 0
)
""")
self.conn.commit()
def add_data(self, ppn, source_link, destination_link=None, response_code=0):
self.cursor.execute(
"""
INSERT INTO response (ppn, source_link, destination_link, response_code)
VALUES (?, ?, ?, ?)
""",
(ppn, source_link, destination_link, response_code),
)
self.conn.commit()
def get_data(self, query, args):
self.cursor.execute(query, args)
result = self.cursor.fetchall()
return result
def get_links_by_response_code(self, response_code: int):
self.cursor.execute(
"SELECT id, source_link FROM response WHERE response_code = ?",
(response_code,),
)
return self.cursor.fetchall()
def get_links(self):
self.cursor.execute(
"SELECT id, source_link FROM response WHERE response_code = 0"
)
return self.cursor.fetchall()
def update_response_code(self, id, response_code, destination_link):
self.cursor.execute(
"UPDATE response SET response_code = ?, destination_link = ? WHERE id = ?",
(response_code, destination_link, id),
)
self.conn.commit()
def reset_status_codes(self):
self.cursor.execute("UPDATE response SET response_code = 0")
self.cursor.execute("UPDATE response SET destination_link = NULL")
self.conn.commit()
def get_status_codes(self):
self.cursor.execute("SELECT DISTINCT response_code FROM response")
results = self.cursor.fetchall()
result = [str(result[0]) for result in results]
result.sort()
return result
def close(self):
self.conn.close()
def get_status_code_counts(self):
self.cursor.execute(
"SELECT response_code, COUNT(*) FROM response GROUP BY response_code"
)
result = self.cursor.fetchall()
res = [(x[0], x[1]) for x in result]
res.sort(key=lambda x: x[0])
return res
def get_publishers(self):
self.cursor.execute("SELECT source_link FROM response")
links = self.cursor.fetchall()
publishers = []
for link in links:
if "http" not in link[0]:
publishers.append(link[0])
continue
if not "//" in link[0]:
publishers.append(link[0])
continue
publisher = link[0].split("//")[1].split("/")[0]
publishers.append(publisher)
return list(set(publishers))
def get_results_by_publisher(self, publisher, distinct=False):
if distinct:
self.cursor.execute(
"SELECT destination_link, response_code, source_link FROM response WHERE source_link LIKE ?",
(f"%{publisher}%",),
)
return self.cursor.fetchall()
self.cursor.execute(
"SELECT DISTINCT destination_link, response_code, source_link FROM response WHERE source_link LIKE ?",
(f"%{publisher}%",),
)
return self.cursor.fetchall()
def get_results_by_publisher_and_status(
self, publisher, status_code, distinct=False
):
if distinct:
self.cursor.execute(
"SELECT DISTINCT ppn, destination_link, source_link FROM response WHERE source_link LIKE ? AND response_code = ?",
(f"%{publisher}%", status_code),
)
result = self.cursor.fetchall()
self.cursor.execute(
"SELECT ppn, destination_link, source_link FROM response WHERE source_link LIKE ? AND response_code = ?",
(f"%{publisher}%", status_code),
)
result = self.cursor.fetchall()
return [(x[0], x[1], x[2]) for x in result]
def get_publisher_count(self, publisher):
self.cursor.execute(
"SELECT COUNT(*) FROM response WHERE source_link LIKE ?",
(f"%{publisher}%",),
)
return self.cursor.fetchone()[0]
def get_checked_by_publisher(self, publisher):
self.cursor.execute(
"SELECT COUNT(*) FROM response WHERE source_link LIKE ? AND response_code != 0",
(f"%{publisher}%",),
)
return self.cursor.fetchone()[0]
def get_num_of_links_for_status_code(self, statuscode):
data = []
for publisher in self.get_publishers():
self.cursor.execute(
"SELECT COUNT(*) FROM response WHERE source_link LIKE ? AND response_code = ?",
(f"%{publisher}%", statuscode),
)
data.append((publisher, self.cursor.fetchone()[0]))
return data
def get_num_of_links_for_status_code_and_publisher(self, publisher, statuscode):
self.cursor.execute(
"SELECT COUNT(*) FROM response WHERE source_link LIKE ? AND response_code = ?",
(f"%{publisher}%", statuscode),
)
return self.cursor.fetchone()
def get_unique_count_ppns(self):
self.cursor.execute("SELECT COUNT(DISTINCT ppn) FROM response")
return self.cursor.fetchone()[0]
def get_link_count(self):
self.cursor.execute("SELECT COUNT(*) FROM response")
return self.cursor.fetchone()[0]
if __name__ == "__main__":
db = Database("lfer.db")
print(db.get_unique_count_ppns(), db.get_link_count())

446
src/ui/interface.py Normal file
View File

@@ -0,0 +1,446 @@
import os
import time
from PyQt6 import QtWidgets, QtCore
from PyQt6.QtCore import pyqtSlot
from PyQt6.QtWidgets import QVBoxLayout
from PyQt6.QtCharts import QChart, QChartView, QPieSeries, QPieSlice
from PyQt6.QtGui import QPainter
from src.ui.threads import (
CheckThread,
WebscraperThread,
StatusCodeThread,
CheckThreadPlaywright,
)
from .sources.Ui_main_interface import Ui_MainWindow
from src.database import Database
import prettytable
import loguru
import sys
from src.ui.utils import QtqdmProgressBar
log = loguru.logger
log.remove()
log.add(sys.stdout, level="INFO")
class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
def __init__(self):
super().__init__()
self.setupUi(self)
self.showResults.clicked.connect(self.create_graph)
# set summary to show the first tab
self.summary.setCurrentIndex(0)
self.spinTimeout.setButtonSymbols(
QtWidgets.QAbstractSpinBox.ButtonSymbols.PlusMinus
)
self.spinTimer.setButtonSymbols(
QtWidgets.QAbstractSpinBox.ButtonSymbols.PlusMinus
)
# self.splitter = QtWidgets.QSplitter(QtCore.Qt.Orientation.Horizontal)
# self.splitter.addWidget(self.graph_frame)
# self.splitter.addWidget(self.chart_frame)
# self.splitter.setSizes([2, 1])
# self.widget.layout().addWidget(self.splitter)
# self.widget.layout().removeWidget(self.chart_frame)
# self.widget.layout().removeWidget(self.graph_frame)
self.db = Database("lfer.db")
self.db_publishers = self.db.get_publishers()
self.db_publishers.sort()
self.publishers.addItems(self.db_publishers)
self.response_data.itemDoubleClicked.connect(self.display_detailed_overview)
self.status_codes = self.db.get_status_codes()
# self.progressBar = None
self.progressBar_qtdm = QtqdmProgressBar(self)
self.progressBar_qtdm.setStyle(QtWidgets.QStyleFactory.create("Fusion")) #
pb_font = self.progressBar_qtdm.font()
pb_font.setBold(False)
self.progressBar_qtdm.setFont(pb_font)
self.widget.setLayout(QVBoxLayout())
self.widget.layout().addWidget(self.progressBar_qtdm)
# self.horizontalLayout_7.setStretch(
# self.horizontalLayout_7.indexOf(self.progressBar_qtdm), 1
# )
self.progressBar_qtdm.hide()
# status code check data
self.check_status_code.addItems(self.status_codes)
self.check_code.clicked.connect(self.check_status_code_clicked)
self.total = 0
self.total_scraper = 0
# self.progressBar.hide()
self.check_overview.hide()
self.statusMessage = QtWidgets.QLabel()
self.webscraperMessage = QtWidgets.QLabel()
self.statusBar().addPermanentWidget(self.statusMessage)
self.statusBar().addPermanentWidget(self.webscraperMessage)
# overview data
self.checkThreads = []
self.update_database_stats()
self.updatedatabasestats.clicked.connect(self.update_database_stats)
# database stuff
self.load_ppn.clicked.connect(self.load_ppns)
self.resetStatusCodes.clicked.connect(self.reset_status_codes)
# statuscodegraph #! not working as intended
self.stats_scroll_area_table.itemDoubleClicked.connect(self.show_graph)
self.graph_data = []
self.graph_data_total = 0
self.status_code = None
self.stats_scroll_area_table.setColumnCount(3)
# set column 3 size to use the remaining space based on the table size
# set column 3 width to 50px
self.stats_scroll_area_table.setColumnWidth(2, 50)
self.stats_scroll_area_table.horizontalHeader().setSectionResizeMode(
2, QtWidgets.QHeaderView.ResizeMode.Stretch
)
# export
self.exportBtn.clicked.connect(self.export_data_single)
self.exportAll.clicked.connect(self.export_data_all)
self.exportAll.setEnabled(False)
# eta
self.usePlaywright.clicked.connect(self.toggle_playwright)
def toggle_playwright(self):
if self.usePlaywright.isChecked():
self.groupBox_2.setEnabled(False)
else:
self.groupBox_2.setEnabled(True)
def export_data_single(self):
self.export_data(self.detailed_overview)
def export_data_all(self):
table = prettytable.PrettyTable()
table.field_names = ["PPN", "Link"]
status_code = self.status_code
data = self.db.get_data(
"SELECT ppn, source_link FROM response WHERE response_code = ?",
(status_code,),
)
for item in data:
table.add_row([item[0], item[1]])
with open("export.tsv", "w", encoding="utf-8") as f:
f.write(table.get_csv_string().replace(",", "\t"))
os.system("start export.tsv")
def export_data(self, tableData):
table = prettytable.PrettyTable()
table.field_names = ["PPN", "Link"]
for i in range(self.tableData.rowCount()):
table.add_row(
[
self.tableData.item(i, 0).text(),
self.tableData.item(i, 1).text(),
]
)
# export as tsv
with open("export.tsv", "w", encoding="utf-8") as f:
f.write(table.get_csv_string().replace(",", "\t"))
os.system("start export.tsv")
def show_graph(self, pos):
print("show graph")
self.exportAll.setEnabled(True)
status_code = self.stats_scroll_area_table.item(pos.row(), 0).text()
self.status_code = status_code
scthread = StatusCodeThread(status_code=status_code)
scthread.progressSignal.connect(self.update_progress_graph)
scthread.total_entries_signal.connect(self.set_total_entries_graph)
scthread.current_data_signal.connect(self.add_data)
scthread.start()
self.checkThreads.append(scthread)
def add_data(self, data):
if data[1] != 0:
self.graph_data.append(data)
def set_total_entries_graph(self, total):
self.graph_data_total = total
def update_progress_graph(self, progress):
self.statusMessage.setText(f"Progress: {progress}/{self.graph_data_total}")
if progress == self.graph_data_total:
self.statusMessage.setText("")
self.statusBar().showMessage("Done", 2000)
time.sleep(2)
self.summary.setCurrentIndex(0)
self.graph_data.sort(key=lambda x: x[1], reverse=True)
self.create_pie_chart(
self.graph_data, "Data for Status Code {}".format(self.status_code)
)
self.add_table_data(self.graph_data)
self.statusMessage.setText("")
def reset_status_codes(self):
# create a warning dialog
dialog = QtWidgets.QMessageBox()
dialog.setWindowTitle("Reset Status Codes")
dialog.setText("Do you really want to reset all status codes?")
dialog.setStandardButtons(
QtWidgets.QMessageBox.StandardButton.Yes
| QtWidgets.QMessageBox.StandardButton.No
)
dialog.setDefaultButton(QtWidgets.QMessageBox.StandardButton.No)
if dialog.exec() == QtWidgets.QMessageBox.StandardButton.Yes:
self.db.reset_status_codes()
self.update_database_stats()
else:
pass
def load_ppns(self):
filedialog = QtWidgets.QFileDialog()
filedialog.setFileMode(QtWidgets.QFileDialog.FileMode.ExistingFiles)
filedialog.setNameFilter("Text files (*.txt)")
if filedialog.exec():
files = filedialog.selectedFiles()
for file in files:
scrapeThread = WebscraperThread(ppnfilePath=file)
scrapeThread.updateSignal.connect(self.update_progress_scraper)
scrapeThread.total_entries_signal.connect(
self.set_total_entries_scraper
)
scrapeThread.start()
self.checkThreads.append(scrapeThread)
# self.update_database_stats()
# update status codes in the dropdown
self.check_status_code.clear()
self.check_status_code.addItems(self.db.get_status_codes())
def update_progress_scraper(self, progress):
self.webscraperMessage.setText(f"Progress: {progress}/{self.total_scraper}")
self.update_database_stats()
def set_total_entries_scraper(self, total):
self.total_scraper = total
def update_database_stats(self):
stats = self.db.get_status_code_counts()
self.stats_scroll_area_table.setRowCount(0)
total_entries = sum([stat[1] for stat in stats])
total_links = self.db.get_link_count()
ppn_count = self.db.get_unique_count_ppns()
self.databasestats.setTextFormat(QtCore.Qt.TextFormat.RichText)
self.databasestats.setText(
"Es wurden <b>{}</b> Links für <b>{}</b> Medien gefunden".format(
total_links, ppn_count
)
)
for stat in stats:
self.stats_scroll_area_table.insertRow(
self.stats_scroll_area_table.rowCount()
)
self.stats_scroll_area_table.setItem(
self.stats_scroll_area_table.rowCount() - 1,
0,
QtWidgets.QTableWidgetItem(str(stat[0])),
)
self.stats_scroll_area_table.setItem(
self.stats_scroll_area_table.rowCount() - 1,
1,
QtWidgets.QTableWidgetItem(str(stat[1])),
)
self.stats_scroll_area_table.setItem(
self.stats_scroll_area_table.rowCount() - 1,
2,
QtWidgets.QTableWidgetItem(
str(round(stat[1] / total_entries * 100, 2)) + "%"
),
)
def update_eta(self, eta):
# self.eta.setText(f"ETA: {eta}")
pass
@pyqtSlot()
@pyqtSlot()
def check_status_code_clicked(self):
self.progressBar_qtdm.setValue(0)
self.progressBar_qtdm.show()
self.check_overview.setPlainText("")
status_code = self.check_status_code.currentText()
if self.usePlaywright.isChecked():
log.info("Using Playwright")
checkThread = CheckThreadPlaywright(status_code=str(status_code))
else:
log.info("Using Requests")
checkThread = CheckThread(status_code=str(status_code))
checkThread.setTimes(self.spinTimeout.value(), self.spinTimer.value())
self.statusBar().showMessage("Checking status code", 2000)
# Restore these signal connections
checkThread.updateSignal.connect(self.update_progress)
checkThread.total_entries_signal.connect(self.set_total_entries)
checkThread.resultSignal.connect(self.showMessage)
checkThread.progress.connect(self.progressBar_qtdm.make_progress)
checkThread.finished.connect(self.update_done)
self.check_code.setEnabled(False)
self.check_overview.show()
checkThread.start()
self.checkThreads.append(checkThread)
def update_done(self):
self.check_code.setEnabled(True)
self.progressBar_qtdm.hide()
self.progressBar_qtdm.setValue(0)
self.statusBar().showMessage("Done", 2000)
# def check_status_code_clicked(self):
# self.progressBar_qtdm.setValue(0)
# self.progressBar_qtdm.show()
# self.check_overview.setPlainText("")
# status_code = self.check_status_code.currentText()
# if self.usePlaywright.isChecked():
# log.info("Using Playwright")
# checkThread = CheckThreadPlaywright(status_code=str(status_code))
# else:
# log.info("Using Requests")
# checkThread = CheckThread(status_code=str(status_code))
# checkThread.setTimes(self.spinTimeout.value(), self.spinTimer.value())
# self.statusBar().showMessage("Checking status code", 2000)
# # checkThread.updateSignal.connect(self.update_progress)
# # checkThread.total_entries_signal.connect(self.set_total_entries)
# # checkThread.resultSignal.connect(self.showMessage)
# # checkThread.etaSignal.connect(self.progressBar_qtdm.make_progress)
# checkThread.progress.connect(self.progressBar_qtdm.make_progress)
# self.check_code.setEnabled(False)
# # self.eta.setText("ETA: Calculating...")
# self.check_overview.show()
# checkThread.start()
# self.checkThreads.append(checkThread)
def set_total_entries(self, total):
self.total = total
self.progressBar_qtdm.show()
self.progressBar_qtdm.setMaximum(total)
def showMessage(self, message):
currentText = self.check_overview.toPlainText()
lines = currentText.count("\n")
if lines > 100:
self.check_overview.setPlainText("")
self.check_overview.append(message)
# follow last line
self.check_overview.verticalScrollBar().setValue(
self.check_overview.verticalScrollBar().maximum()
)
def update_progress(self):
self.updatedatabasestats.click()
def create_graph(self):
self.summary.setCurrentIndex(0)
publisher = self.publishers.currentText()
duplicates = self.addDuplicates.isChecked()
if duplicates:
data = self.db.get_results_by_publisher(publisher, distinct=True)
else:
data = self.db.get_results_by_publisher(publisher, distinct=False)
publisher_count = self.db.get_publisher_count(publisher)
checked_count = self.db.get_checked_by_publisher(publisher)
self.total_results_publisher.setText(
"Anzahl Titel: {}, davon geprüft: {}".format(publisher_count, checked_count)
)
graph_data = {}
for item in data:
if item[1] in graph_data:
graph_data[item[1]] += 1
else:
graph_data[item[1]] = 1
table_data = []
for key, value in graph_data.items():
table_data.append((key, value))
self.create_pie_chart(
table_data, title="Statistik der Status Codes für {}".format(publisher)
)
self.add_table_data(table_data)
def add_table_data(self, data):
self.response_data.setRowCount(0)
for item in data:
rowPosition = self.response_data.rowCount()
self.response_data.insertRow(rowPosition)
self.response_data.setItem(
rowPosition, 0, QtWidgets.QTableWidgetItem(str(item[0]))
)
self.response_data.setItem(
rowPosition, 1, QtWidgets.QTableWidgetItem(str(item[1]))
)
def create_pie_chart(self, data, title="Count of Status Codes"):
# self.splitter.setSizes([80, 20])
# self.splitter.setSizes([2, 1])
if self.graph_frame.layout().count() > 0:
self.graph_frame.layout().itemAt(0).widget().deleteLater()
series = QPieSeries()
for item in data:
series.append(str(item[0]), item[1])
chart = QChart()
chart.addSeries(series)
chart.setAnimationOptions(QChart.AnimationOption.SeriesAnimations)
chart.legend().setVisible(True)
chart.legend().setAlignment(QtCore.Qt.AlignmentFlag.AlignRight)
chart.setTitle(title)
chartview = QChartView(chart)
chartview.setRenderHint(QPainter.RenderHint.Antialiasing)
self.graph_frame.layout().addWidget(chartview)
slices = series.slices()
slices.sort(key=lambda x: x.percentage(), reverse=True)
for slice in slices:
slice.setLabel(f"{slice.label()} {round(slice.percentage() * 100, 2)}%")
# display slice data on hover
def display_detailed_overview(self, item):
self.summary.setCurrentIndex(1)
# from self.response_data get the selected status code
status_code = self.response_data.item(item.row(), 0).text()
if status_code.isnumeric():
publisher = self.publishers.currentText()
data = self.db.get_results_by_publisher_and_status(publisher, status_code)
else:
publisher = status_code
status_code = self.status_code
data = self.db.get_results_by_publisher_and_status(publisher, status_code)
self.detailed_overview.setRowCount(0)
for res in data:
self.detailed_overview.insertRow(self.detailed_overview.rowCount())
self.detailed_overview.setItem(
self.detailed_overview.rowCount() - 1,
0,
QtWidgets.QTableWidgetItem(str(res[0])),
)
self.detailed_overview.setItem(
self.detailed_overview.rowCount() - 1,
1,
QtWidgets.QTableWidgetItem(str(res[1])),
)
self.detailed_overview.setItem(
self.detailed_overview.rowCount() - 1,
2,
QtWidgets.QTableWidgetItem(str(res[2])),
)
def launch():
app = QtWidgets.QApplication([])
# app.setStyle("Fusion")
window = MainWindow()
window.show()
app.exec()

View File

@@ -0,0 +1,340 @@
# Form implementation generated from reading ui file 'c:\Users\aky547\GitHub\LinkAvailableChecker\src\ui\sources\main_interface.ui'
#
# Created by: PyQt6 UI code generator 6.7.1
#
# WARNING: Any manual changes made to this file will be lost when pyuic6 is
# run again. Do not edit this file unless you know what you are doing.
from PyQt6 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(824, 740)
self.centralwidget = QtWidgets.QWidget(parent=MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.verticalLayout = QtWidgets.QVBoxLayout(self.centralwidget)
self.verticalLayout.setObjectName("verticalLayout")
self.horizontalLayout_4 = QtWidgets.QHBoxLayout()
self.horizontalLayout_4.setObjectName("horizontalLayout_4")
self.database_stats = QtWidgets.QGroupBox(parent=self.centralwidget)
font = QtGui.QFont()
font.setBold(True)
self.database_stats.setFont(font)
self.database_stats.setObjectName("database_stats")
self.verticalLayout_6 = QtWidgets.QVBoxLayout(self.database_stats)
self.verticalLayout_6.setObjectName("verticalLayout_6")
self.horizontalLayout_6 = QtWidgets.QHBoxLayout()
self.horizontalLayout_6.setObjectName("horizontalLayout_6")
self.load_ppn = QtWidgets.QPushButton(parent=self.database_stats)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Policy.Maximum, QtWidgets.QSizePolicy.Policy.Fixed)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.load_ppn.sizePolicy().hasHeightForWidth())
self.load_ppn.setSizePolicy(sizePolicy)
font = QtGui.QFont()
font.setBold(False)
self.load_ppn.setFont(font)
self.load_ppn.setObjectName("load_ppn")
self.horizontalLayout_6.addWidget(self.load_ppn)
self.resetStatusCodes = QtWidgets.QPushButton(parent=self.database_stats)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Policy.Maximum, QtWidgets.QSizePolicy.Policy.Fixed)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.resetStatusCodes.sizePolicy().hasHeightForWidth())
self.resetStatusCodes.setSizePolicy(sizePolicy)
font = QtGui.QFont()
font.setBold(False)
self.resetStatusCodes.setFont(font)
self.resetStatusCodes.setObjectName("resetStatusCodes")
self.horizontalLayout_6.addWidget(self.resetStatusCodes)
self.verticalLayout_6.addLayout(self.horizontalLayout_6)
self.databasestats = QtWidgets.QLabel(parent=self.database_stats)
font = QtGui.QFont()
font.setBold(False)
self.databasestats.setFont(font)
self.databasestats.setText("")
self.databasestats.setTextFormat(QtCore.Qt.TextFormat.PlainText)
self.databasestats.setObjectName("databasestats")
self.verticalLayout_6.addWidget(self.databasestats)
self.stats_scroll_area_table = QtWidgets.QTableWidget(parent=self.database_stats)
self.stats_scroll_area_table.setMinimumSize(QtCore.QSize(0, 100))
font = QtGui.QFont()
font.setBold(False)
self.stats_scroll_area_table.setFont(font)
self.stats_scroll_area_table.setFrameShape(QtWidgets.QFrame.Shape.NoFrame)
self.stats_scroll_area_table.setVerticalScrollBarPolicy(QtCore.Qt.ScrollBarPolicy.ScrollBarAsNeeded)
self.stats_scroll_area_table.setHorizontalScrollBarPolicy(QtCore.Qt.ScrollBarPolicy.ScrollBarAsNeeded)
self.stats_scroll_area_table.setEditTriggers(QtWidgets.QAbstractItemView.EditTrigger.NoEditTriggers)
self.stats_scroll_area_table.setAlternatingRowColors(True)
self.stats_scroll_area_table.setObjectName("stats_scroll_area_table")
self.stats_scroll_area_table.setColumnCount(3)
self.stats_scroll_area_table.setRowCount(0)
item = QtWidgets.QTableWidgetItem()
self.stats_scroll_area_table.setHorizontalHeaderItem(0, item)
item = QtWidgets.QTableWidgetItem()
self.stats_scroll_area_table.setHorizontalHeaderItem(1, item)
item = QtWidgets.QTableWidgetItem()
self.stats_scroll_area_table.setHorizontalHeaderItem(2, item)
self.stats_scroll_area_table.horizontalHeader().setDefaultSectionSize(90)
self.stats_scroll_area_table.horizontalHeader().setMinimumSectionSize(50)
self.stats_scroll_area_table.horizontalHeader().setStretchLastSection(True)
self.stats_scroll_area_table.verticalHeader().setDefaultSectionSize(35)
self.verticalLayout_6.addWidget(self.stats_scroll_area_table)
self.updatedatabasestats = QtWidgets.QPushButton(parent=self.database_stats)
font = QtGui.QFont()
font.setBold(False)
self.updatedatabasestats.setFont(font)
self.updatedatabasestats.setObjectName("updatedatabasestats")
self.verticalLayout_6.addWidget(self.updatedatabasestats)
self.horizontalLayout_4.addWidget(self.database_stats)
self.verticalLayout_8 = QtWidgets.QVBoxLayout()
self.verticalLayout_8.setObjectName("verticalLayout_8")
self.verticalLayout_7 = QtWidgets.QVBoxLayout()
self.verticalLayout_7.setSizeConstraint(QtWidgets.QLayout.SizeConstraint.SetDefaultConstraint)
self.verticalLayout_7.setObjectName("verticalLayout_7")
self.horizontalLayout_3 = QtWidgets.QHBoxLayout()
self.horizontalLayout_3.setObjectName("horizontalLayout_3")
self.label = QtWidgets.QLabel(parent=self.centralwidget)
self.label.setObjectName("label")
self.horizontalLayout_3.addWidget(self.label)
self.publishers = QtWidgets.QComboBox(parent=self.centralwidget)
self.publishers.setEditable(True)
self.publishers.setObjectName("publishers")
self.horizontalLayout_3.addWidget(self.publishers)
self.horizontalLayout_3.setStretch(0, 1)
self.horizontalLayout_3.setStretch(1, 5)
self.verticalLayout_7.addLayout(self.horizontalLayout_3)
self.horizontalLayout_5 = QtWidgets.QHBoxLayout()
self.horizontalLayout_5.setObjectName("horizontalLayout_5")
spacerItem = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Policy.Expanding, QtWidgets.QSizePolicy.Policy.Minimum)
self.horizontalLayout_5.addItem(spacerItem)
self.addDuplicates = QtWidgets.QCheckBox(parent=self.centralwidget)
self.addDuplicates.setLayoutDirection(QtCore.Qt.LayoutDirection.LeftToRight)
self.addDuplicates.setChecked(True)
self.addDuplicates.setTristate(False)
self.addDuplicates.setObjectName("addDuplicates")
self.horizontalLayout_5.addWidget(self.addDuplicates)
self.verticalLayout_7.addLayout(self.horizontalLayout_5)
self.showResults = QtWidgets.QPushButton(parent=self.centralwidget)
self.showResults.setObjectName("showResults")
self.verticalLayout_7.addWidget(self.showResults)
self.groupBox = QtWidgets.QGroupBox(parent=self.centralwidget)
font = QtGui.QFont()
font.setBold(True)
self.groupBox.setFont(font)
self.groupBox.setObjectName("groupBox")
self.formLayout = QtWidgets.QFormLayout(self.groupBox)
self.formLayout.setObjectName("formLayout")
self.label_3 = QtWidgets.QLabel(parent=self.groupBox)
font = QtGui.QFont()
font.setBold(False)
self.label_3.setFont(font)
self.label_3.setObjectName("label_3")
self.formLayout.setWidget(0, QtWidgets.QFormLayout.ItemRole.LabelRole, self.label_3)
self.check_status_code = QtWidgets.QComboBox(parent=self.groupBox)
font = QtGui.QFont()
font.setBold(False)
self.check_status_code.setFont(font)
self.check_status_code.setObjectName("check_status_code")
self.formLayout.setWidget(0, QtWidgets.QFormLayout.ItemRole.FieldRole, self.check_status_code)
self.usePlaywright = QtWidgets.QCheckBox(parent=self.groupBox)
self.usePlaywright.setObjectName("usePlaywright")
self.formLayout.setWidget(1, QtWidgets.QFormLayout.ItemRole.LabelRole, self.usePlaywright)
self.check_code = QtWidgets.QPushButton(parent=self.groupBox)
font = QtGui.QFont()
font.setBold(False)
self.check_code.setFont(font)
self.check_code.setObjectName("check_code")
self.formLayout.setWidget(1, QtWidgets.QFormLayout.ItemRole.FieldRole, self.check_code)
self.groupBox_2 = QtWidgets.QGroupBox(parent=self.groupBox)
self.groupBox_2.setObjectName("groupBox_2")
self.verticalLayout_9 = QtWidgets.QVBoxLayout(self.groupBox_2)
self.verticalLayout_9.setObjectName("verticalLayout_9")
self.label_4 = QtWidgets.QLabel(parent=self.groupBox_2)
font = QtGui.QFont()
font.setBold(False)
self.label_4.setFont(font)
self.label_4.setObjectName("label_4")
self.verticalLayout_9.addWidget(self.label_4)
self.spinTimer = QtWidgets.QSpinBox(parent=self.groupBox_2)
font = QtGui.QFont()
font.setBold(False)
self.spinTimer.setFont(font)
self.spinTimer.setButtonSymbols(QtWidgets.QAbstractSpinBox.ButtonSymbols.UpDownArrows)
self.spinTimer.setMinimum(1)
self.spinTimer.setMaximum(60)
self.spinTimer.setObjectName("spinTimer")
self.verticalLayout_9.addWidget(self.spinTimer)
self.label_5 = QtWidgets.QLabel(parent=self.groupBox_2)
font = QtGui.QFont()
font.setBold(False)
self.label_5.setFont(font)
self.label_5.setObjectName("label_5")
self.verticalLayout_9.addWidget(self.label_5)
self.spinTimeout = QtWidgets.QSpinBox(parent=self.groupBox_2)
font = QtGui.QFont()
font.setBold(False)
self.spinTimeout.setFont(font)
self.spinTimeout.setButtonSymbols(QtWidgets.QAbstractSpinBox.ButtonSymbols.UpDownArrows)
self.spinTimeout.setMinimum(5)
self.spinTimeout.setMaximum(100)
self.spinTimeout.setObjectName("spinTimeout")
self.verticalLayout_9.addWidget(self.spinTimeout)
spacerItem1 = QtWidgets.QSpacerItem(20, 40, QtWidgets.QSizePolicy.Policy.Minimum, QtWidgets.QSizePolicy.Policy.Expanding)
self.verticalLayout_9.addItem(spacerItem1)
self.formLayout.setWidget(2, QtWidgets.QFormLayout.ItemRole.LabelRole, self.groupBox_2)
self.check_overview = QtWidgets.QTextBrowser(parent=self.groupBox)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Policy.Expanding, QtWidgets.QSizePolicy.Policy.Maximum)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.check_overview.sizePolicy().hasHeightForWidth())
self.check_overview.setSizePolicy(sizePolicy)
self.check_overview.setMinimumSize(QtCore.QSize(0, 100))
self.check_overview.setMaximumSize(QtCore.QSize(16777215, 130))
font = QtGui.QFont()
font.setBold(False)
self.check_overview.setFont(font)
self.check_overview.setOpenExternalLinks(True)
self.check_overview.setObjectName("check_overview")
self.formLayout.setWidget(2, QtWidgets.QFormLayout.ItemRole.FieldRole, self.check_overview)
self.horizontalLayout_7 = QtWidgets.QHBoxLayout()
self.horizontalLayout_7.setObjectName("horizontalLayout_7")
self.widget = QtWidgets.QWidget(parent=self.groupBox)
self.widget.setMinimumSize(QtCore.QSize(0, 30))
self.widget.setObjectName("widget")
self.horizontalLayout_7.addWidget(self.widget)
self.formLayout.setLayout(3, QtWidgets.QFormLayout.ItemRole.FieldRole, self.horizontalLayout_7)
self.verticalLayout_7.addWidget(self.groupBox)
self.verticalLayout_8.addLayout(self.verticalLayout_7)
self.horizontalLayout_4.addLayout(self.verticalLayout_8)
self.horizontalLayout_4.setStretch(0, 1)
self.horizontalLayout_4.setStretch(1, 2)
self.verticalLayout.addLayout(self.horizontalLayout_4)
self.widget1 = QtWidgets.QWidget(parent=self.centralwidget)
self.widget1.setObjectName("widget1")
self.horizontalLayout = QtWidgets.QHBoxLayout(self.widget1)
self.horizontalLayout.setObjectName("horizontalLayout")
self.graph_frame = QtWidgets.QFrame(parent=self.widget1)
self.graph_frame.setFrameShape(QtWidgets.QFrame.Shape.StyledPanel)
self.graph_frame.setFrameShadow(QtWidgets.QFrame.Shadow.Raised)
self.graph_frame.setObjectName("graph_frame")
self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.graph_frame)
self.verticalLayout_2.setContentsMargins(2, 2, 2, 2)
self.verticalLayout_2.setSpacing(0)
self.verticalLayout_2.setObjectName("verticalLayout_2")
self.horizontalLayout.addWidget(self.graph_frame)
self.chart_frame = QtWidgets.QFrame(parent=self.widget1)
self.chart_frame.setFrameShape(QtWidgets.QFrame.Shape.StyledPanel)
self.chart_frame.setFrameShadow(QtWidgets.QFrame.Shadow.Raised)
self.chart_frame.setObjectName("chart_frame")
self.verticalLayout_3 = QtWidgets.QVBoxLayout(self.chart_frame)
self.verticalLayout_3.setContentsMargins(2, 2, 2, 2)
self.verticalLayout_3.setSpacing(0)
self.verticalLayout_3.setObjectName("verticalLayout_3")
self.summary = QtWidgets.QStackedWidget(parent=self.chart_frame)
self.summary.setObjectName("summary")
self.page = QtWidgets.QWidget()
self.page.setObjectName("page")
self.verticalLayout_5 = QtWidgets.QVBoxLayout(self.page)
self.verticalLayout_5.setObjectName("verticalLayout_5")
self.response_data = QtWidgets.QTableWidget(parent=self.page)
self.response_data.setEditTriggers(QtWidgets.QAbstractItemView.EditTrigger.NoEditTriggers)
self.response_data.setAlternatingRowColors(True)
self.response_data.setObjectName("response_data")
self.response_data.setColumnCount(2)
self.response_data.setRowCount(0)
item = QtWidgets.QTableWidgetItem()
self.response_data.setHorizontalHeaderItem(0, item)
item = QtWidgets.QTableWidgetItem()
self.response_data.setHorizontalHeaderItem(1, item)
self.verticalLayout_5.addWidget(self.response_data)
self.total_results_publisher = QtWidgets.QLabel(parent=self.page)
self.total_results_publisher.setText("")
self.total_results_publisher.setObjectName("total_results_publisher")
self.verticalLayout_5.addWidget(self.total_results_publisher)
self.exportAll = QtWidgets.QPushButton(parent=self.page)
self.exportAll.setObjectName("exportAll")
self.verticalLayout_5.addWidget(self.exportAll)
self.summary.addWidget(self.page)
self.page_2 = QtWidgets.QWidget()
self.page_2.setObjectName("page_2")
self.verticalLayout_4 = QtWidgets.QVBoxLayout(self.page_2)
self.verticalLayout_4.setObjectName("verticalLayout_4")
self.detailed_overview = QtWidgets.QTableWidget(parent=self.page_2)
self.detailed_overview.setObjectName("detailed_overview")
self.detailed_overview.setColumnCount(3)
self.detailed_overview.setRowCount(0)
item = QtWidgets.QTableWidgetItem()
self.detailed_overview.setHorizontalHeaderItem(0, item)
item = QtWidgets.QTableWidgetItem()
self.detailed_overview.setHorizontalHeaderItem(1, item)
item = QtWidgets.QTableWidgetItem()
self.detailed_overview.setHorizontalHeaderItem(2, item)
self.verticalLayout_4.addWidget(self.detailed_overview)
self.exportBtn = QtWidgets.QPushButton(parent=self.page_2)
self.exportBtn.setObjectName("exportBtn")
self.verticalLayout_4.addWidget(self.exportBtn)
self.summary.addWidget(self.page_2)
self.verticalLayout_3.addWidget(self.summary)
self.horizontalLayout.addWidget(self.chart_frame)
self.verticalLayout.addWidget(self.widget1)
self.verticalLayout.setStretch(0, 1)
self.verticalLayout.setStretch(1, 4)
MainWindow.setCentralWidget(self.centralwidget)
self.statusbar = QtWidgets.QStatusBar(parent=MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.actionPPNs_laden = QtGui.QAction(parent=MainWindow)
self.actionPPNs_laden.setObjectName("actionPPNs_laden")
self.actionStatus_Code_zur_cksetzen = QtGui.QAction(parent=MainWindow)
self.actionStatus_Code_zur_cksetzen.setObjectName("actionStatus_Code_zur_cksetzen")
self.retranslateUi(MainWindow)
self.summary.setCurrentIndex(0)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
self.database_stats.setTitle(_translate("MainWindow", "Statistik der Datenbank"))
self.load_ppn.setText(_translate("MainWindow", "PPNs laden"))
self.resetStatusCodes.setText(_translate("MainWindow", "Status Codes löschen"))
item = self.stats_scroll_area_table.horizontalHeaderItem(0)
item.setText(_translate("MainWindow", "Status Code"))
item = self.stats_scroll_area_table.horizontalHeaderItem(1)
item.setText(_translate("MainWindow", "Anzahl Treffer"))
item = self.stats_scroll_area_table.horizontalHeaderItem(2)
item.setText(_translate("MainWindow", "%"))
self.updatedatabasestats.setText(_translate("MainWindow", "Aktualisieren"))
self.label.setText(_translate("MainWindow", "Verlag"))
self.addDuplicates.setText(_translate("MainWindow", "Dubletten entfernen"))
self.showResults.setText(_translate("MainWindow", "Daten anzeigen"))
self.groupBox.setTitle(_translate("MainWindow", "Statuscode(s) prüfen"))
self.label_3.setText(_translate("MainWindow", "Status Code"))
self.usePlaywright.setToolTip(_translate("MainWindow", "Verwendet einen emulierten Browser, um die Seiten zu prüfen, dauert länger, kein ETA berechenbar"))
self.usePlaywright.setText(_translate("MainWindow", "Browser\n"
"verwenden"))
self.check_code.setText(_translate("MainWindow", "Prüfen"))
self.groupBox_2.setTitle(_translate("MainWindow", "Timers"))
self.label_4.setToolTip(_translate("MainWindow", "Pause zwischen den Anfragen"))
self.label_4.setText(_translate("MainWindow", "Sleep Timer"))
self.label_5.setToolTip(_translate("MainWindow", "Limit, ab wann eine Anfrage als ungültig gewertet wird"))
self.label_5.setText(_translate("MainWindow", "Timeout"))
self.response_data.setSortingEnabled(True)
item = self.response_data.horizontalHeaderItem(0)
item.setText(_translate("MainWindow", "Response Code"))
item = self.response_data.horizontalHeaderItem(1)
item.setText(_translate("MainWindow", "Anzahl Treffer"))
self.exportAll.setText(_translate("MainWindow", "Alles exportieren"))
item = self.detailed_overview.horizontalHeaderItem(0)
item.setText(_translate("MainWindow", "PPN"))
item = self.detailed_overview.horizontalHeaderItem(1)
item.setText(_translate("MainWindow", "Unser Link"))
item = self.detailed_overview.horizontalHeaderItem(2)
item.setText(_translate("MainWindow", "Ergebnis"))
self.exportBtn.setText(_translate("MainWindow", "Daten exportieren"))
self.actionPPNs_laden.setText(_translate("MainWindow", "PPNs laden"))
self.actionStatus_Code_zur_cksetzen.setText(_translate("MainWindow", "Status Code zurücksetzen"))

View File

@@ -0,0 +1,573 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>MainWindow</class>
<widget class="QMainWindow" name="MainWindow">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>824</width>
<height>740</height>
</rect>
</property>
<property name="windowTitle">
<string>MainWindow</string>
</property>
<widget class="QWidget" name="centralwidget">
<layout class="QVBoxLayout" name="verticalLayout" stretch="1,4">
<item>
<layout class="QHBoxLayout" name="horizontalLayout_4" stretch="1,2">
<item>
<widget class="QGroupBox" name="database_stats">
<property name="font">
<font>
<bold>true</bold>
</font>
</property>
<property name="title">
<string>Statistik der Datenbank</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout_6">
<item>
<layout class="QHBoxLayout" name="horizontalLayout_6">
<item>
<widget class="QPushButton" name="load_ppn">
<property name="sizePolicy">
<sizepolicy hsizetype="Maximum" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="font">
<font>
<bold>false</bold>
</font>
</property>
<property name="text">
<string>PPNs laden</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="resetStatusCodes">
<property name="sizePolicy">
<sizepolicy hsizetype="Maximum" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="font">
<font>
<bold>false</bold>
</font>
</property>
<property name="text">
<string>Status Codes löschen</string>
</property>
</widget>
</item>
</layout>
</item>
<item>
<widget class="QLabel" name="databasestats">
<property name="font">
<font>
<bold>false</bold>
</font>
</property>
<property name="text">
<string/>
</property>
<property name="textFormat">
<enum>Qt::PlainText</enum>
</property>
</widget>
</item>
<item>
<widget class="QTableWidget" name="stats_scroll_area_table">
<property name="minimumSize">
<size>
<width>0</width>
<height>100</height>
</size>
</property>
<property name="font">
<font>
<bold>false</bold>
</font>
</property>
<property name="frameShape">
<enum>QFrame::NoFrame</enum>
</property>
<property name="verticalScrollBarPolicy">
<enum>Qt::ScrollBarAsNeeded</enum>
</property>
<property name="horizontalScrollBarPolicy">
<enum>Qt::ScrollBarAsNeeded</enum>
</property>
<property name="editTriggers">
<set>QAbstractItemView::NoEditTriggers</set>
</property>
<property name="alternatingRowColors">
<bool>true</bool>
</property>
<attribute name="horizontalHeaderMinimumSectionSize">
<number>50</number>
</attribute>
<attribute name="horizontalHeaderDefaultSectionSize">
<number>90</number>
</attribute>
<attribute name="horizontalHeaderStretchLastSection">
<bool>true</bool>
</attribute>
<attribute name="verticalHeaderDefaultSectionSize">
<number>35</number>
</attribute>
<column>
<property name="text">
<string>Status Code</string>
</property>
</column>
<column>
<property name="text">
<string>Anzahl Treffer</string>
</property>
</column>
<column>
<property name="text">
<string>%</string>
</property>
</column>
</widget>
</item>
<item>
<widget class="QPushButton" name="updatedatabasestats">
<property name="font">
<font>
<bold>false</bold>
</font>
</property>
<property name="text">
<string>Aktualisieren</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<layout class="QVBoxLayout" name="verticalLayout_8">
<item>
<layout class="QVBoxLayout" name="verticalLayout_7">
<property name="sizeConstraint">
<enum>QLayout::SetDefaultConstraint</enum>
</property>
<item>
<layout class="QHBoxLayout" name="horizontalLayout_3" stretch="1,5">
<item>
<widget class="QLabel" name="label">
<property name="text">
<string>Verlag</string>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="publishers">
<property name="editable">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</item>
<item>
<layout class="QHBoxLayout" name="horizontalLayout_5">
<item>
<spacer name="horizontalSpacer">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QCheckBox" name="addDuplicates">
<property name="layoutDirection">
<enum>Qt::LeftToRight</enum>
</property>
<property name="text">
<string>Dubletten entfernen</string>
</property>
<property name="checked">
<bool>true</bool>
</property>
<property name="tristate">
<bool>false</bool>
</property>
</widget>
</item>
</layout>
</item>
<item>
<widget class="QPushButton" name="showResults">
<property name="text">
<string>Daten anzeigen</string>
</property>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox">
<property name="font">
<font>
<bold>true</bold>
</font>
</property>
<property name="title">
<string>Statuscode prüfen</string>
</property>
<layout class="QFormLayout" name="formLayout">
<item row="0" column="0">
<widget class="QLabel" name="label_3">
<property name="font">
<font>
<bold>false</bold>
</font>
</property>
<property name="text">
<string>Status Code</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QComboBox" name="check_status_code">
<property name="font">
<font>
<bold>false</bold>
</font>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QCheckBox" name="usePlaywright">
<property name="toolTip">
<string>Verwendet einen emulierten Browser, um die Seiten zu prüfen, dauert länger, kein ETA berechenbar</string>
</property>
<property name="text">
<string>Browser
verwenden</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QPushButton" name="check_code">
<property name="font">
<font>
<bold>false</bold>
</font>
</property>
<property name="text">
<string>Prüfen</string>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QGroupBox" name="groupBox_2">
<property name="title">
<string>Timers</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout_9">
<item>
<widget class="QLabel" name="label_4">
<property name="font">
<font>
<bold>false</bold>
</font>
</property>
<property name="toolTip">
<string>Pause zwischen den Anfragen</string>
</property>
<property name="text">
<string>Sleep Timer</string>
</property>
</widget>
</item>
<item>
<widget class="QSpinBox" name="spinTimer">
<property name="font">
<font>
<bold>false</bold>
</font>
</property>
<property name="buttonSymbols">
<enum>QAbstractSpinBox::UpDownArrows</enum>
</property>
<property name="minimum">
<number>1</number>
</property>
<property name="maximum">
<number>60</number>
</property>
</widget>
</item>
<item>
<widget class="QLabel" name="label_5">
<property name="font">
<font>
<bold>false</bold>
</font>
</property>
<property name="toolTip">
<string>Limit, ab wann eine Anfrage als ungültig gewertet wird</string>
</property>
<property name="text">
<string>Timeout</string>
</property>
</widget>
</item>
<item>
<widget class="QSpinBox" name="spinTimeout">
<property name="font">
<font>
<bold>false</bold>
</font>
</property>
<property name="buttonSymbols">
<enum>QAbstractSpinBox::UpDownArrows</enum>
</property>
<property name="minimum">
<number>5</number>
</property>
<property name="maximum">
<number>100</number>
</property>
</widget>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
</item>
<item row="2" column="1">
<widget class="QTextBrowser" name="check_overview">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Maximum">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="minimumSize">
<size>
<width>0</width>
<height>100</height>
</size>
</property>
<property name="maximumSize">
<size>
<width>16777215</width>
<height>130</height>
</size>
</property>
<property name="font">
<font>
<bold>false</bold>
</font>
</property>
<property name="openExternalLinks">
<bool>true</bool>
</property>
</widget>
</item>
<item row="3" column="1">
<layout class="QHBoxLayout" name="horizontalLayout_7">
<item>
<widget class="QWidget" name="widget" native="true">
<property name="minimumSize">
<size>
<width>0</width>
<height>30</height>
</size>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
</item>
</layout>
</item>
</layout>
</item>
</layout>
</item>
<item>
<widget class="QWidget" name="widget" native="true">
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QFrame" name="graph_frame">
<property name="frameShape">
<enum>QFrame::StyledPanel</enum>
</property>
<property name="frameShadow">
<enum>QFrame::Raised</enum>
</property>
<layout class="QVBoxLayout" name="verticalLayout_2">
<property name="spacing">
<number>0</number>
</property>
<property name="leftMargin">
<number>2</number>
</property>
<property name="topMargin">
<number>2</number>
</property>
<property name="rightMargin">
<number>2</number>
</property>
<property name="bottomMargin">
<number>2</number>
</property>
</layout>
</widget>
</item>
<item>
<widget class="QFrame" name="chart_frame">
<property name="frameShape">
<enum>QFrame::StyledPanel</enum>
</property>
<property name="frameShadow">
<enum>QFrame::Raised</enum>
</property>
<layout class="QVBoxLayout" name="verticalLayout_3">
<property name="spacing">
<number>0</number>
</property>
<property name="leftMargin">
<number>2</number>
</property>
<property name="topMargin">
<number>2</number>
</property>
<property name="rightMargin">
<number>2</number>
</property>
<property name="bottomMargin">
<number>2</number>
</property>
<item>
<widget class="QStackedWidget" name="summary">
<property name="currentIndex">
<number>0</number>
</property>
<widget class="QWidget" name="page">
<layout class="QVBoxLayout" name="verticalLayout_5">
<item>
<widget class="QTableWidget" name="response_data">
<property name="editTriggers">
<set>QAbstractItemView::NoEditTriggers</set>
</property>
<property name="alternatingRowColors">
<bool>true</bool>
</property>
<property name="sortingEnabled">
<bool>true</bool>
</property>
<column>
<property name="text">
<string>Response Code</string>
</property>
</column>
<column>
<property name="text">
<string>Anzahl Treffer</string>
</property>
</column>
</widget>
</item>
<item>
<widget class="QLabel" name="total_results_publisher">
<property name="text">
<string/>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="exportAll">
<property name="text">
<string>Alles exportieren</string>
</property>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="page_2">
<layout class="QVBoxLayout" name="verticalLayout_4">
<item>
<widget class="QTableWidget" name="detailed_overview">
<column>
<property name="text">
<string>PPN</string>
</property>
</column>
<column>
<property name="text">
<string>Unser Link</string>
</property>
</column>
<column>
<property name="text">
<string>Ergebnis</string>
</property>
</column>
</widget>
</item>
<item>
<widget class="QPushButton" name="exportBtn">
<property name="text">
<string>Daten exportieren</string>
</property>
</widget>
</item>
</layout>
</widget>
</widget>
</item>
</layout>
</widget>
</item>
</layout>
</widget>
</item>
</layout>
</widget>
<widget class="QStatusBar" name="statusbar"/>
<action name="actionPPNs_laden">
<property name="text">
<string>PPNs laden</string>
</property>
</action>
<action name="actionStatus_Code_zur_cksetzen">
<property name="text">
<string>Status Code zurücksetzen</string>
</property>
</action>
</widget>
<resources/>
<connections/>
</ui>

495
src/ui/threads.py Normal file
View File

@@ -0,0 +1,495 @@
from PyQt6 import QtCore, QtWidgets
from PyQt6.QtCore import QThread, pyqtSignal, QTimer
from src.database import Database
import time
import loguru
import xml.etree.ElementTree as ET
import requests
from ratelimit import limits, sleep_and_retry
from datetime import timedelta
from datetime import datetime
import asyncio
from playwright.async_api import async_playwright
import tqdm
import os
log = loguru.logger
log.remove()
log.add("status_code.log", rotation="100 MB")
class Qtqdm(tqdm.std.tqdm):
"""
Override the tqdm class so we can push updates via a custom callback
"""
def __init__(
self,
iterable=None,
desc=None,
total=None,
leave=True,
file=open(os.devnull, "w"),
ncols=None,
mininterval=0.1,
maxinterval=10.0,
miniters=None,
ascii=None,
disable=False,
unit="it",
unit_scale=False,
dynamic_ncols=False,
smoothing=0.3,
bar_format=None,
initial=0,
position=None,
postfix=None,
unit_divisor=1000,
write_bytes=None,
lock_args=None,
nrows=None,
colour=None,
delay=0,
gui=False,
update_callback=None,
**kwargs,
):
self._update_callback = update_callback
super(Qtqdm, self).__init__(
iterable,
desc,
total,
leave,
file, # no change here
ncols,
mininterval,
maxinterval,
miniters,
ascii,
disable,
unit,
unit_scale,
False, # change param ?
smoothing,
bar_format,
initial,
position,
postfix,
unit_divisor,
gui,
**kwargs,
)
# override the method to call a custom callback on every refresh
def refresh(self, nolock=False, lock_args=None):
super(Qtqdm, self).refresh(nolock=nolock, lock_args=lock_args)
if self._update_callback:
self._update_callback(self.format_dict)
class ETAManager(QtCore.QObject):
etaSignal = QtCore.pyqtSignal(str)
def __init__(self, links):
super().__init__()
self.remaining_time = 0
self.links = links
self.running = True
self.eta_thread = QtCore.QThread()
self.eta_worker = CountdownManagedWorker(self.remaining_time, links)
self.eta_worker.moveToThread(self.eta_thread)
self.eta_worker.etaSignal.connect(self.etaSignal.emit)
self.eta_thread.started.connect(self.eta_worker.run)
self.times = []
def start(self):
"""Start the ETA thread."""
self.eta_thread.start()
def calculate_average(self):
if len(self.times) == 0:
return 0
return sum(self.times) / len(self.times)
def estimate_remaining_time(self):
average_time = self.calculate_average()
return average_time * len(self.check_links)
def add_timestamp(self, timestamp):
self.times.append(timestamp)
class CountdownManagedWorker(QtCore.QObject):
etaSignal = QtCore.pyqtSignal(str)
def __init__(self, total_time):
super().__init__()
self.remaining_time = total_time
self.check_links = None
self.running = True
self.times = []
def run(self):
"""Runs the countdown timer, emitting updated ETAs every second."""
while self.running:
etatime = str(timedelta(seconds=int(self.remaining_time)))
self.etaSignal.emit(etatime)
time.sleep(1)
self.remaining_time -= 1
self.etaSignal.emit("00:00:00")
class CountdownWorker(QtCore.QObject):
"""Worker for ETA countdown, running in a separate QThread."""
etaSignal = QtCore.pyqtSignal(str)
def __init__(self, total_time):
super().__init__()
self.remaining_time = total_time
self.running = True
def run(self):
"""Runs the countdown timer, emitting updated ETAs every second."""
while self.remaining_time > 0 and self.running:
etatime = str(timedelta(seconds=int(self.remaining_time)))
self.etaSignal.emit(etatime)
time.sleep(1)
self.remaining_time -= 1
self.etaSignal.emit("00:00:00")
def update_remaining_time(self, remaining_time):
"""Updates the remaining time dynamically."""
self.remaining_time = max(0, remaining_time)
def stop(self):
"""Stops the countdown."""
self.running = False
class CheckThread(QtCore.QThread):
updateSignal = QtCore.pyqtSignal()
total_entries_signal = QtCore.pyqtSignal(int)
resultSignal = QtCore.pyqtSignal(str)
etaSignal = QtCore.pyqtSignal(dict)
startSignal = QtCore.pyqtSignal()
progress = pyqtSignal(dict)
def __init__(self, parent=None, status_code=None):
super().__init__(parent)
self.check_code = None
self.status_code = status_code
self.sleepTimer = 0
self.timeout = 0
self.per_request_time = sum([self.sleepTimer, self.timeout])
self.running = True
self.eta_worker = None
self.eta_thread = None
def set_status_code(self, status_code):
self.status_code = status_code
def setTimes(self, timeout, sleepTimer):
self.timeout = timeout
self.sleepTimer = sleepTimer
self.per_request_time = sum([self.sleepTimer, self.timeout])
def run(self):
self.db = Database("lfer.db")
links = self.db.get_links_by_response_code(self.status_code)
self.total_entries_signal.emit(len(links))
if len(links) == 0:
self.etaSignal.emit({"text": "Done"})
return
remaining_time = len(links) * self.per_request_time
# self.eta_thread = QtCore.QThread()
# self.eta_worker = CountdownWorker(remaining_time)
# self.eta_worker.moveToThread(self.eta_thread)
# self.eta_worker.etaSignal.connect(
# self.etaSignal.emit, QtCore.Qt.ConnectionType.DirectConnection
# )
# self.eta_thread.started.connect(self.eta_worker.run)
# self.eta_thread.start()
tqdm_object = Qtqdm(
range(len(links)), unit_scale=True, update_callback=self._update_callback
)
self.startSignal.emit()
for i in tqdm_object:
if not self.running:
break
id, url = links[i]
response_code, destination_link = self.get_status_code(url)
self.db.update_response_code(id, response_code, destination_link)
self.resultSignal.emit(f"{url} : {response_code}")
self.updateSignal.emit()
time.sleep(self.sleepTimer)
# for i, (id, url) in enumerate(links):
# if not self.running:
# break
# response_code, destination_link = self.get_status_code(url)
# self.db.update_response_code(id, response_code, destination_link)
# self.updateSignal.emit(i + 1)
# # Update remaining time dynamically
# remaining_time -= self.per_request_time
# self.eta_worker.update_remaining_time(remaining_time)
# time.sleep(self.sleepTimer)
self.db.close()
# self.running = False # Stop the ETA countdown
# self.eta_thread.quit()
# self.eta_thread.wait()
def _update_callback(self, status):
self.progress.emit(status)
def stop(self):
"""Stops the processing and ETA update."""
self.running = False
# for i in range(len(links)):
# id, url = links[i]
# response_code, destination_link = self.get_status_code(url)
# self.db.update_response_code(id, response_code, destination_link)
# self.updateSignal.emit(i + 1)
# self.resultSignal.emit(f"{url} : {response_code}")
# time.sleep(self.sleepTimer)
# self.db.close()
@log.catch()
@sleep_and_retry
def get_status_code(self, url):
non_support = ["d-nb.info", ".jpg", ".png", ".jpeg"]
if any(x in url for x in non_support):
log.error(f"URL: {url}, ERROR: Site not supported")
return -2, "Site not supported"
if "Error" in url:
log.error(f"URL: {url}, ERROR: No data found")
return -1, "No data found"
try:
# userAgent = "Automated LFER Status Code Checker/1.1 (alexander.kirchner@ph-freiburg.de)"
userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
headers = {"User-Agent": userAgent, "Accept": accept}
response = requests.get(url, headers=headers, timeout=self.timeout)
log.info(f"URL: {url}, Status Code: {response.status_code}")
return response.status_code, response.url
except Exception as e:
log.error(f"URL: {url}, Status Code: 0")
return 0, str(e)
class WebscraperThread(QtCore.QThread):
updateSignal = QtCore.pyqtSignal(int)
total_entries_signal = QtCore.pyqtSignal(int)
def __init__(self, parent=None, ppnfilePath=None):
super().__init__(parent)
self.ppnfilePath = ppnfilePath
def set_ppnfilePath(self, ppnfilePath):
self.ppnfilePath = ppnfilePath
def run(self):
self.db = Database("lfer.db")
with open(self.ppnfilePath, "r") as f:
ppns = f.read()
ppns = ppns.split("\n")
self.total_entries_signal.emit(len(ppns))
for i in range(len(ppns)):
ppn = ppns[i]
data = self.fetch_data(ppn)
links = self.process_response(data)
if links is None:
self.db.add_data(ppn, "Error: No data found")
else:
for link in links:
self.db.add_data(ppn, link)
self.updateSignal.emit(i + 1)
time.sleep(0.1)
self.db.close()
@sleep_and_retry
@limits(calls=10, period=1)
def fetch_data(self, ppn):
api_url = f"https://sru.bsz-bw.de/swb?version=1.1&query=pica.ppn%3D{ppn}&operation=searchRetrieve&maximumRecords=10&recordSchema=marcxmlk10os"
response = requests.get(api_url)
return response.text
def process_response(self, response):
"""Extracts URLs from datafield 856, subfield u."""
try:
root = ET.fromstring(response)
namespace = {
"zs": "http://www.loc.gov/zing/srw/",
"marc": "http://www.loc.gov/MARC21/slim",
}
# Find all recordData elements
record_data = root.find(".//zs:recordData", namespace)
if record_data is None:
return None
# Find all datafield 856 elements
links = []
for datafield in record_data.findall(
".//marc:datafield[@tag='856']", namespace
):
for subfield in datafield.findall(
"marc:subfield[@code='u']", namespace
):
links.append(subfield.text)
return links if links else None
except ET.ParseError:
return None
class StatusCodeThread(QtCore.QThread):
progressSignal = QtCore.pyqtSignal(int)
total_entries_signal = QtCore.pyqtSignal(int)
current_data_signal = QtCore.pyqtSignal(tuple)
def __init__(self, parent=None, status_code=None):
super().__init__(parent)
self.status_code = status_code
def set_status_code(self, status_code):
self.status_code = status_code
def run(self):
self.db = Database("lfer.db")
publishers = self.db.get_publishers()
self.total_entries_signal.emit(len(publishers))
for i in range(len(publishers)):
data = self.db.get_num_of_links_for_status_code_and_publisher(
publishers[i], self.status_code
)
self.current_data_signal.emit((publishers[i], data[0]))
self.progressSignal.emit(i + 1)
class CheckThreadPlaywright(QtCore.QThread):
updateSignal = QtCore.pyqtSignal(int)
total_entries_signal = QtCore.pyqtSignal(int)
resultSignal = QtCore.pyqtSignal(str)
etaSignal = QtCore.pyqtSignal(str)
def __init__(self, parent=None, status_code=None):
super().__init__(parent)
self.check_code = None
self.status_code = status_code
self.running = True
self.eta_worker = None
self.eta_thread = None
self.browser = None # Browser will be initialized asynchronously
def set_status_code(self, status_code):
self.status_code = status_code
def setTimes(self, timeout, sleepTimer):
self.timeout = timeout
self.sleepTimer = sleepTimer
self.per_request_time = sum([self.sleepTimer, self.timeout])
async def getBrowser(self):
"""Asynchronously launches Playwright browser"""
self.playwright = await async_playwright().start()
browser = await self.playwright.chromium.launch()
return browser
async def get_page_status(self, browser, url):
"""Asynchronously fetches page status"""
page = await browser.new_page()
try:
response = await page.goto(url, wait_until="domcontentloaded")
# If response is None, the request was aborted (e.g., a PDF opened)
if response is None:
print(f"Navigation aborted (likely due to PDF): {url}")
await page.close()
return -3, url # Treat as success
destination_link = response.url
status_code = response.status
except Exception as e:
print(f"Error loading {url}: {e}")
status_code = -3 # Custom error code
destination_link = url
finally:
await page.close()
return status_code, destination_link
def run(self):
"""Starts Playwright in an event loop"""
asyncio.run(self.run_async())
async def estimate_remaining_time(self, links):
# open a single playwright instance to estimate the time it takes to process a single request
playwright = await async_playwright().start()
browser = await playwright.chromium.launch()
request_start_time = datetime.now()
await self.get_page_status(browser, links[0][1])
await browser.close()
await playwright.stop()
request_done_time = datetime.now()
difference = (request_done_time - request_start_time).seconds
remaining_time = len(links) * difference
return remaining_time
async def run_async(self):
"""Async version of the run method"""
self.db = Database("lfer.db")
links = self.db.get_links_by_response_code(self.status_code)
self.total_entries_signal.emit(len(links))
if len(links) == 0:
self.etaSignal.emit("Done")
return
playwright = await async_playwright().start()
browser = await playwright.chromium.launch()
remaining_time = await self.estimate_remaining_time(links)
self.eta_thread = QtCore.QThread()
self.eta_worker = CountdownWorker(remaining_time)
self.eta_worker.moveToThread(self.eta_thread)
self.eta_worker.etaSignal.connect(
self.etaSignal.emit, QtCore.Qt.ConnectionType.DirectConnection
)
self.eta_thread.started.connect(self.eta_worker.run)
self.eta_thread.start()
for i, (id, url) in enumerate(links):
if not self.running:
break
request_start_time = datetime.now()
response_code, destination_link = await self.get_page_status(browser, url)
self.db.update_response_code(id, response_code, destination_link)
self.updateSignal.emit(i + 1)
self.resultSignal.emit(f"{url} : {response_code}")
request_done_time = datetime.now()
difference = (request_done_time - request_start_time).seconds
# estimate the ETA based on the time it took to process the request
remaining_time -= difference
self.eta_worker.update_remaining_time(remaining_time)
await browser.close()
await playwright.stop() # Ensure Playwright stops properly
self.db.close()
self.running = False # Stop the ETA countdown
self.eta_thread.quit()
self.eta_thread.wait()
def stop(self):
"""Stops the processing and ETA update."""
self.running = False

44
src/ui/utils.py Normal file
View File

@@ -0,0 +1,44 @@
from PyQt6.QtWidgets import QProgressBar
from PyQt6.QtCore import pyqtSlot
from PyQt6 import QtCore
import datetime
class QtqdmProgressBar(QProgressBar):
def __init__(self, parent):
super(QtqdmProgressBar, self).__init__(parent)
self.setMinimumWidth(300) # Set minimum width
self.setMinimumHeight(20) # Set minimum height
self.setTextVisible(True) # Enable text display
self.setFormat("%p%") # Set format to show percentage
self.setAlignment(QtCore.Qt.AlignmentFlag.AlignCenter)
@pyqtSlot(dict)
def make_progress(self, status: dict):
# print(status)
# {'n': 29, 'total': 30, 'elapsed': 2.9780092239379883, 'ncols': None, 'nrows': None,
# 'prefix': 'Progress bar Name: ', 'ascii': False, 'unit': 'it', 'unit_scale': True,
# 'rate': 9.74477716265916, 'bar_format': None, 'postfix': None, 'unit_divisor': 1000, 'initial': 0,
# 'colour': None}
initial = status.get("initial", 0)
total = status.get("total", 0)
iteration = status.get("n", 0)
unit = status.get("unit", "it")
rate = status.get("rate", 0.0)
rate = (
0.0 if rate is None else rate
) # rate is None on the start of the iteration
time_elapsed = status.get("elapsed", 0.0)
time_remaining = ((total - iteration) / rate) if rate and rate > 0 else 0
# get remaining time in DD:HH:MM:SS format
formated_time_remaining = str(datetime.timedelta(seconds=time_remaining))
prefix = status.get("prefix", "")
text = status.get("text", "")
self.setMinimum(initial or 0)
self.setMaximum(total or 0)
self.setValue(iteration or 0)
self.setFormat(
f"{prefix} %v of %m (%p%) at {rate:0.4f} {unit}/sec after {time_elapsed:0.2f} sec > {formated_time_remaining}" # was time_remaining:0.2f sec
)

104
status_code.py Normal file
View File

@@ -0,0 +1,104 @@
import requests
from src.database import Database
import threading
from ratelimit import limits, sleep_and_retry
import time
import loguru
log = loguru.logger
log.remove()
log.add("status_code.log", rotation="100 MB")
THREADS = 10
threadlist = []
db = Database("lfer.db")
links = db.get_links()
LINKLEN = len(links)
LINKPROGRESS = 0
RESPONSES = []
non_support = ["d-nb.info", ".jpg", ".png", ".jpeg"]
@log.catch()
def get_status_code(url):
if any(x in url for x in non_support):
log.error(f"URL: {url}, ERROR: Site not supported")
return -2, "Site not supported"
if "Error" in url:
log.error(f"URL: {url}, ERROR: No data found")
return -1, "No data found"
try:
userAgent = (
"Automated LFER Status Code Checker/1.0 (alexander.kirchner@ph-freiburg.de)"
)
accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
headers = {"User-Agent": userAgent, "Accept": accept}
response = requests.get(url, headers=headers, timeout=50)
log.info(f"URL: {url}, Status Code: {response.status_code}")
return response.status_code, response.url
except Exception as e:
log.error(f"URL: {url}, Status Code: 0")
return 0, str(e)
def worker(listpart):
global LINKPROGRESS
global RESPONSES
for link in listpart:
id, url = link
response_code, destination_link = get_status_code(url)
RESPONSES.append((id, response_code, destination_link))
LINKPROGRESS += 1
print("Progress: ", LINKPROGRESS, "/", LINKLEN, end="\r")
def main_threaded():
global threadlist
global links
global THREADS
global LINKLEN
global LINKPROGRESS
for i in range(THREADS):
start = i * (LINKLEN // THREADS)
end = (i + 1) * (LINKLEN // THREADS)
if i == THREADS - 1:
end = LINKLEN
threadlist.append(threading.Thread(target=worker, args=(links[start:end],)))
for thread in threadlist:
thread.start()
for thread in threadlist:
thread.join()
for response in RESPONSES:
id, response_code, destination_link = response
db.update_response_code(id, response_code, destination_link)
print("Done")
def main():
for i in range(len(links)):
id, url = links[i]
response_code, destination_link = get_status_code(url)
db.update_response_code(id, response_code, destination_link)
print("Progress: ", i + 1, "/", LINKLEN, end="\r")
time.sleep(1)
print("Done")
def check_by_status_code(status_code):
links = db.get_links_by_response_code(status_code)
for i in range(len(links)):
id, url = links[i]
response_code, destination_link = get_status_code(url)
if response_code == status_code:
db.update_response_code(id, response_code, destination_link)
print("Progress: ", i + 1, "/", LINKLEN, end="\r")
time.sleep(1)
print("Done")
if __name__ == "__main__":
main() # checks all with code 0
# check_by_status_code(429) # checks titles with timeout

63
webscraper.py Normal file
View File

@@ -0,0 +1,63 @@
import requests
from ratelimit import limits, sleep_and_retry
from src.database import Database
import xml.etree.ElementTree as ET
db = Database("lfer.db")
@sleep_and_retry
@limits(calls=10, period=1)
def fetch_data(ppn):
api_url = f"https://sru.bsz-bw.de/swb?version=1.1&query=pica.ppn%3D{ppn}&operation=searchRetrieve&maximumRecords=10&recordSchema=marcxmlk10os"
response = requests.get(api_url)
return response.text
def process_response(response):
"""Extracts URLs from datafield 856, subfield u."""
try:
root = ET.fromstring(response)
namespace = {
"zs": "http://www.loc.gov/zing/srw/",
"marc": "http://www.loc.gov/MARC21/slim",
}
# Find all recordData elements
record_data = root.find(".//zs:recordData", namespace)
if record_data is None:
return None
# Find all datafield 856 elements
links = []
for datafield in record_data.findall(
".//marc:datafield[@tag='856']", namespace
):
for subfield in datafield.findall("marc:subfield[@code='u']", namespace):
links.append(subfield.text)
return links if links else None
except ET.ParseError:
return None
def get_data():
with open("ppnlist.txt", "r") as f:
ppns = f.read()
ppns = ppns.split("\n")
for ppn in ppns:
data = fetch_data(ppn)
links = process_response(data)
if links is None:
db.add_data(ppn, "Error: No data found")
else:
for link in links:
db.add_data(ppn, link)
print("Progress: ", ppns.index(ppn) + 1, "/", len(ppns), end="\r")
if __name__ == "__main__":
print("Hello from webscraper!\nScraping the list of PPNs...")
get_data()
print("Done")