diff --git a/main.py b/main.py
new file mode 100644
index 0000000..8d6e62c
--- /dev/null
+++ b/main.py
@@ -0,0 +1,4 @@
+from src.ui.interface import launch
+
+if __name__ == "__main__":
+ launch()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..d90bb08
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,28 @@
+[project]
+name = "linkavailablechecker"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+ "beautifulsoup4>=4.12.3",
+ "loguru>=0.7.3",
+ "playwright>=1.49.1",
+ "pyqt6-charts>=6.8.0",
+ "pyqt6>=6.8.1",
+ "pyqtgraph>=0.13.7",
+ "ratelimit>=2.2.1",
+ "requests>=2.32.3",
+ "prettytable>=3.14.0",
+ "cloudscraper>=1.2.71",
+ "limit>=0.2.3",
+ "alive-progress>=3.2.0",
+ "tqdm>=4.67.1",
+]
+
+[dependency-groups]
+dev = [
+ "pip>=25.0",
+ "selenium>=4.29.0",
+ "undetected-chromedriver>=3.5.5",
+]
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..97d07c1
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1 @@
+from .ui.interface import MainWindow
diff --git a/src/database.py b/src/database.py
new file mode 100644
index 0000000..c43ef1f
--- /dev/null
+++ b/src/database.py
@@ -0,0 +1,169 @@
+import sqlite3
+
+
+class Database:
+ def __init__(self, db_name):
+ self.conn = sqlite3.connect(db_name)
+ self.cursor = self.conn.cursor()
+ self.create_table()
+
+ def create_table(self):
+ self.cursor.execute("""
+ CREATE TABLE IF NOT EXISTS response (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ ppn TEXT NOT NULL,
+ source_link TEXT ,
+ destination_link TEXT,
+ response_code INTEGER NOT NULL DEFAULT 0
+ )
+ """)
+ self.conn.commit()
+
+ def add_data(self, ppn, source_link, destination_link=None, response_code=0):
+ self.cursor.execute(
+ """
+ INSERT INTO response (ppn, source_link, destination_link, response_code)
+ VALUES (?, ?, ?, ?)
+ """,
+ (ppn, source_link, destination_link, response_code),
+ )
+ self.conn.commit()
+
+ def get_data(self, query, args):
+ self.cursor.execute(query, args)
+ result = self.cursor.fetchall()
+ return result
+
+ def get_links_by_response_code(self, response_code: int):
+ self.cursor.execute(
+ "SELECT id, source_link FROM response WHERE response_code = ?",
+ (response_code,),
+ )
+ return self.cursor.fetchall()
+
+ def get_links(self):
+ self.cursor.execute(
+ "SELECT id, source_link FROM response WHERE response_code = 0"
+ )
+ return self.cursor.fetchall()
+
+ def update_response_code(self, id, response_code, destination_link):
+ self.cursor.execute(
+ "UPDATE response SET response_code = ?, destination_link = ? WHERE id = ?",
+ (response_code, destination_link, id),
+ )
+ self.conn.commit()
+
+ def reset_status_codes(self):
+ self.cursor.execute("UPDATE response SET response_code = 0")
+ self.cursor.execute("UPDATE response SET destination_link = NULL")
+ self.conn.commit()
+
+ def get_status_codes(self):
+ self.cursor.execute("SELECT DISTINCT response_code FROM response")
+ results = self.cursor.fetchall()
+ result = [str(result[0]) for result in results]
+ result.sort()
+ return result
+
+ def close(self):
+ self.conn.close()
+
+ def get_status_code_counts(self):
+ self.cursor.execute(
+ "SELECT response_code, COUNT(*) FROM response GROUP BY response_code"
+ )
+ result = self.cursor.fetchall()
+ res = [(x[0], x[1]) for x in result]
+ res.sort(key=lambda x: x[0])
+ return res
+
+ def get_publishers(self):
+ self.cursor.execute("SELECT source_link FROM response")
+ links = self.cursor.fetchall()
+ publishers = []
+ for link in links:
+ if "http" not in link[0]:
+ publishers.append(link[0])
+ continue
+ if not "//" in link[0]:
+ publishers.append(link[0])
+ continue
+ publisher = link[0].split("//")[1].split("/")[0]
+ publishers.append(publisher)
+ return list(set(publishers))
+
+ def get_results_by_publisher(self, publisher, distinct=False):
+ if distinct:
+ self.cursor.execute(
+ "SELECT destination_link, response_code, source_link FROM response WHERE source_link LIKE ?",
+ (f"%{publisher}%",),
+ )
+ return self.cursor.fetchall()
+ self.cursor.execute(
+ "SELECT DISTINCT destination_link, response_code, source_link FROM response WHERE source_link LIKE ?",
+ (f"%{publisher}%",),
+ )
+ return self.cursor.fetchall()
+
+ def get_results_by_publisher_and_status(
+ self, publisher, status_code, distinct=False
+ ):
+ if distinct:
+ self.cursor.execute(
+ "SELECT DISTINCT ppn, destination_link, source_link FROM response WHERE source_link LIKE ? AND response_code = ?",
+ (f"%{publisher}%", status_code),
+ )
+ result = self.cursor.fetchall()
+
+ self.cursor.execute(
+ "SELECT ppn, destination_link, source_link FROM response WHERE source_link LIKE ? AND response_code = ?",
+ (f"%{publisher}%", status_code),
+ )
+ result = self.cursor.fetchall()
+
+ return [(x[0], x[1], x[2]) for x in result]
+
+ def get_publisher_count(self, publisher):
+ self.cursor.execute(
+ "SELECT COUNT(*) FROM response WHERE source_link LIKE ?",
+ (f"%{publisher}%",),
+ )
+ return self.cursor.fetchone()[0]
+
+ def get_checked_by_publisher(self, publisher):
+ self.cursor.execute(
+ "SELECT COUNT(*) FROM response WHERE source_link LIKE ? AND response_code != 0",
+ (f"%{publisher}%",),
+ )
+ return self.cursor.fetchone()[0]
+
+ def get_num_of_links_for_status_code(self, statuscode):
+ data = []
+ for publisher in self.get_publishers():
+ self.cursor.execute(
+ "SELECT COUNT(*) FROM response WHERE source_link LIKE ? AND response_code = ?",
+ (f"%{publisher}%", statuscode),
+ )
+ data.append((publisher, self.cursor.fetchone()[0]))
+ return data
+
+ def get_num_of_links_for_status_code_and_publisher(self, publisher, statuscode):
+ self.cursor.execute(
+ "SELECT COUNT(*) FROM response WHERE source_link LIKE ? AND response_code = ?",
+ (f"%{publisher}%", statuscode),
+ )
+ return self.cursor.fetchone()
+
+ def get_unique_count_ppns(self):
+ self.cursor.execute("SELECT COUNT(DISTINCT ppn) FROM response")
+ return self.cursor.fetchone()[0]
+
+ def get_link_count(self):
+ self.cursor.execute("SELECT COUNT(*) FROM response")
+ return self.cursor.fetchone()[0]
+
+
+if __name__ == "__main__":
+ db = Database("lfer.db")
+ print(db.get_unique_count_ppns(), db.get_link_count())
diff --git a/src/ui/interface.py b/src/ui/interface.py
new file mode 100644
index 0000000..f7555b8
--- /dev/null
+++ b/src/ui/interface.py
@@ -0,0 +1,446 @@
+import os
+import time
+from PyQt6 import QtWidgets, QtCore
+from PyQt6.QtCore import pyqtSlot
+from PyQt6.QtWidgets import QVBoxLayout
+from PyQt6.QtCharts import QChart, QChartView, QPieSeries, QPieSlice
+from PyQt6.QtGui import QPainter
+from src.ui.threads import (
+ CheckThread,
+ WebscraperThread,
+ StatusCodeThread,
+ CheckThreadPlaywright,
+)
+from .sources.Ui_main_interface import Ui_MainWindow
+from src.database import Database
+import prettytable
+import loguru
+import sys
+from src.ui.utils import QtqdmProgressBar
+
+
+log = loguru.logger
+log.remove()
+log.add(sys.stdout, level="INFO")
+
+
+class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
+ def __init__(self):
+ super().__init__()
+ self.setupUi(self)
+ self.showResults.clicked.connect(self.create_graph)
+ # set summary to show the first tab
+ self.summary.setCurrentIndex(0)
+ self.spinTimeout.setButtonSymbols(
+ QtWidgets.QAbstractSpinBox.ButtonSymbols.PlusMinus
+ )
+ self.spinTimer.setButtonSymbols(
+ QtWidgets.QAbstractSpinBox.ButtonSymbols.PlusMinus
+ )
+ # self.splitter = QtWidgets.QSplitter(QtCore.Qt.Orientation.Horizontal)
+ # self.splitter.addWidget(self.graph_frame)
+ # self.splitter.addWidget(self.chart_frame)
+ # self.splitter.setSizes([2, 1])
+ # self.widget.layout().addWidget(self.splitter)
+ # self.widget.layout().removeWidget(self.chart_frame)
+ # self.widget.layout().removeWidget(self.graph_frame)
+
+ self.db = Database("lfer.db")
+ self.db_publishers = self.db.get_publishers()
+ self.db_publishers.sort()
+ self.publishers.addItems(self.db_publishers)
+ self.response_data.itemDoubleClicked.connect(self.display_detailed_overview)
+
+ self.status_codes = self.db.get_status_codes()
+ # self.progressBar = None
+ self.progressBar_qtdm = QtqdmProgressBar(self)
+ self.progressBar_qtdm.setStyle(QtWidgets.QStyleFactory.create("Fusion")) #
+ pb_font = self.progressBar_qtdm.font()
+ pb_font.setBold(False)
+ self.progressBar_qtdm.setFont(pb_font)
+ self.widget.setLayout(QVBoxLayout())
+ self.widget.layout().addWidget(self.progressBar_qtdm)
+
+ # self.horizontalLayout_7.setStretch(
+ # self.horizontalLayout_7.indexOf(self.progressBar_qtdm), 1
+ # )
+
+ self.progressBar_qtdm.hide()
+ # status code check data
+ self.check_status_code.addItems(self.status_codes)
+ self.check_code.clicked.connect(self.check_status_code_clicked)
+ self.total = 0
+ self.total_scraper = 0
+ # self.progressBar.hide()
+ self.check_overview.hide()
+
+ self.statusMessage = QtWidgets.QLabel()
+ self.webscraperMessage = QtWidgets.QLabel()
+ self.statusBar().addPermanentWidget(self.statusMessage)
+ self.statusBar().addPermanentWidget(self.webscraperMessage)
+ # overview data
+ self.checkThreads = []
+ self.update_database_stats()
+ self.updatedatabasestats.clicked.connect(self.update_database_stats)
+
+ # database stuff
+ self.load_ppn.clicked.connect(self.load_ppns)
+ self.resetStatusCodes.clicked.connect(self.reset_status_codes)
+
+ # statuscodegraph #! not working as intended
+ self.stats_scroll_area_table.itemDoubleClicked.connect(self.show_graph)
+ self.graph_data = []
+ self.graph_data_total = 0
+ self.status_code = None
+ self.stats_scroll_area_table.setColumnCount(3)
+ # set column 3 size to use the remaining space based on the table size
+ # set column 3 width to 50px
+ self.stats_scroll_area_table.setColumnWidth(2, 50)
+ self.stats_scroll_area_table.horizontalHeader().setSectionResizeMode(
+ 2, QtWidgets.QHeaderView.ResizeMode.Stretch
+ )
+ # export
+ self.exportBtn.clicked.connect(self.export_data_single)
+ self.exportAll.clicked.connect(self.export_data_all)
+ self.exportAll.setEnabled(False)
+
+ # eta
+ self.usePlaywright.clicked.connect(self.toggle_playwright)
+
+ def toggle_playwright(self):
+ if self.usePlaywright.isChecked():
+ self.groupBox_2.setEnabled(False)
+ else:
+ self.groupBox_2.setEnabled(True)
+
+ def export_data_single(self):
+ self.export_data(self.detailed_overview)
+
+ def export_data_all(self):
+ table = prettytable.PrettyTable()
+ table.field_names = ["PPN", "Link"]
+ status_code = self.status_code
+ data = self.db.get_data(
+ "SELECT ppn, source_link FROM response WHERE response_code = ?",
+ (status_code,),
+ )
+ for item in data:
+ table.add_row([item[0], item[1]])
+ with open("export.tsv", "w", encoding="utf-8") as f:
+ f.write(table.get_csv_string().replace(",", "\t"))
+ os.system("start export.tsv")
+
+ def export_data(self, tableData):
+ table = prettytable.PrettyTable()
+ table.field_names = ["PPN", "Link"]
+ for i in range(self.tableData.rowCount()):
+ table.add_row(
+ [
+ self.tableData.item(i, 0).text(),
+ self.tableData.item(i, 1).text(),
+ ]
+ )
+ # export as tsv
+ with open("export.tsv", "w", encoding="utf-8") as f:
+ f.write(table.get_csv_string().replace(",", "\t"))
+ os.system("start export.tsv")
+
+ def show_graph(self, pos):
+ print("show graph")
+ self.exportAll.setEnabled(True)
+ status_code = self.stats_scroll_area_table.item(pos.row(), 0).text()
+ self.status_code = status_code
+ scthread = StatusCodeThread(status_code=status_code)
+ scthread.progressSignal.connect(self.update_progress_graph)
+ scthread.total_entries_signal.connect(self.set_total_entries_graph)
+ scthread.current_data_signal.connect(self.add_data)
+ scthread.start()
+ self.checkThreads.append(scthread)
+
+ def add_data(self, data):
+ if data[1] != 0:
+ self.graph_data.append(data)
+
+ def set_total_entries_graph(self, total):
+ self.graph_data_total = total
+
+ def update_progress_graph(self, progress):
+ self.statusMessage.setText(f"Progress: {progress}/{self.graph_data_total}")
+ if progress == self.graph_data_total:
+ self.statusMessage.setText("")
+ self.statusBar().showMessage("Done", 2000)
+ time.sleep(2)
+ self.summary.setCurrentIndex(0)
+ self.graph_data.sort(key=lambda x: x[1], reverse=True)
+ self.create_pie_chart(
+ self.graph_data, "Data for Status Code {}".format(self.status_code)
+ )
+ self.add_table_data(self.graph_data)
+ self.statusMessage.setText("")
+
+ def reset_status_codes(self):
+ # create a warning dialog
+ dialog = QtWidgets.QMessageBox()
+ dialog.setWindowTitle("Reset Status Codes")
+ dialog.setText("Do you really want to reset all status codes?")
+ dialog.setStandardButtons(
+ QtWidgets.QMessageBox.StandardButton.Yes
+ | QtWidgets.QMessageBox.StandardButton.No
+ )
+ dialog.setDefaultButton(QtWidgets.QMessageBox.StandardButton.No)
+
+ if dialog.exec() == QtWidgets.QMessageBox.StandardButton.Yes:
+ self.db.reset_status_codes()
+ self.update_database_stats()
+ else:
+ pass
+
+ def load_ppns(self):
+ filedialog = QtWidgets.QFileDialog()
+ filedialog.setFileMode(QtWidgets.QFileDialog.FileMode.ExistingFiles)
+ filedialog.setNameFilter("Text files (*.txt)")
+ if filedialog.exec():
+ files = filedialog.selectedFiles()
+ for file in files:
+ scrapeThread = WebscraperThread(ppnfilePath=file)
+ scrapeThread.updateSignal.connect(self.update_progress_scraper)
+ scrapeThread.total_entries_signal.connect(
+ self.set_total_entries_scraper
+ )
+ scrapeThread.start()
+ self.checkThreads.append(scrapeThread)
+ # self.update_database_stats()
+ # update status codes in the dropdown
+ self.check_status_code.clear()
+ self.check_status_code.addItems(self.db.get_status_codes())
+
+ def update_progress_scraper(self, progress):
+ self.webscraperMessage.setText(f"Progress: {progress}/{self.total_scraper}")
+ self.update_database_stats()
+
+ def set_total_entries_scraper(self, total):
+ self.total_scraper = total
+
+ def update_database_stats(self):
+ stats = self.db.get_status_code_counts()
+ self.stats_scroll_area_table.setRowCount(0)
+ total_entries = sum([stat[1] for stat in stats])
+ total_links = self.db.get_link_count()
+ ppn_count = self.db.get_unique_count_ppns()
+ self.databasestats.setTextFormat(QtCore.Qt.TextFormat.RichText)
+ self.databasestats.setText(
+ "Es wurden {} Links für {} Medien gefunden".format(
+ total_links, ppn_count
+ )
+ )
+ for stat in stats:
+ self.stats_scroll_area_table.insertRow(
+ self.stats_scroll_area_table.rowCount()
+ )
+ self.stats_scroll_area_table.setItem(
+ self.stats_scroll_area_table.rowCount() - 1,
+ 0,
+ QtWidgets.QTableWidgetItem(str(stat[0])),
+ )
+ self.stats_scroll_area_table.setItem(
+ self.stats_scroll_area_table.rowCount() - 1,
+ 1,
+ QtWidgets.QTableWidgetItem(str(stat[1])),
+ )
+ self.stats_scroll_area_table.setItem(
+ self.stats_scroll_area_table.rowCount() - 1,
+ 2,
+ QtWidgets.QTableWidgetItem(
+ str(round(stat[1] / total_entries * 100, 2)) + "%"
+ ),
+ )
+
+ def update_eta(self, eta):
+ # self.eta.setText(f"ETA: {eta}")
+ pass
+
+ @pyqtSlot()
+ @pyqtSlot()
+ def check_status_code_clicked(self):
+ self.progressBar_qtdm.setValue(0)
+ self.progressBar_qtdm.show()
+ self.check_overview.setPlainText("")
+ status_code = self.check_status_code.currentText()
+
+ if self.usePlaywright.isChecked():
+ log.info("Using Playwright")
+ checkThread = CheckThreadPlaywright(status_code=str(status_code))
+ else:
+ log.info("Using Requests")
+ checkThread = CheckThread(status_code=str(status_code))
+
+ checkThread.setTimes(self.spinTimeout.value(), self.spinTimer.value())
+ self.statusBar().showMessage("Checking status code", 2000)
+
+ # Restore these signal connections
+ checkThread.updateSignal.connect(self.update_progress)
+ checkThread.total_entries_signal.connect(self.set_total_entries)
+ checkThread.resultSignal.connect(self.showMessage)
+ checkThread.progress.connect(self.progressBar_qtdm.make_progress)
+ checkThread.finished.connect(self.update_done)
+ self.check_code.setEnabled(False)
+ self.check_overview.show()
+ checkThread.start()
+ self.checkThreads.append(checkThread)
+
+ def update_done(self):
+ self.check_code.setEnabled(True)
+ self.progressBar_qtdm.hide()
+ self.progressBar_qtdm.setValue(0)
+
+ self.statusBar().showMessage("Done", 2000)
+
+ # def check_status_code_clicked(self):
+ # self.progressBar_qtdm.setValue(0)
+ # self.progressBar_qtdm.show()
+ # self.check_overview.setPlainText("")
+ # status_code = self.check_status_code.currentText()
+ # if self.usePlaywright.isChecked():
+ # log.info("Using Playwright")
+ # checkThread = CheckThreadPlaywright(status_code=str(status_code))
+ # else:
+ # log.info("Using Requests")
+ # checkThread = CheckThread(status_code=str(status_code))
+ # checkThread.setTimes(self.spinTimeout.value(), self.spinTimer.value())
+ # self.statusBar().showMessage("Checking status code", 2000)
+ # # checkThread.updateSignal.connect(self.update_progress)
+ # # checkThread.total_entries_signal.connect(self.set_total_entries)
+ # # checkThread.resultSignal.connect(self.showMessage)
+ # # checkThread.etaSignal.connect(self.progressBar_qtdm.make_progress)
+ # checkThread.progress.connect(self.progressBar_qtdm.make_progress)
+ # self.check_code.setEnabled(False)
+ # # self.eta.setText("ETA: Calculating...")
+ # self.check_overview.show()
+ # checkThread.start()
+ # self.checkThreads.append(checkThread)
+
+ def set_total_entries(self, total):
+ self.total = total
+ self.progressBar_qtdm.show()
+ self.progressBar_qtdm.setMaximum(total)
+
+ def showMessage(self, message):
+ currentText = self.check_overview.toPlainText()
+ lines = currentText.count("\n")
+ if lines > 100:
+ self.check_overview.setPlainText("")
+ self.check_overview.append(message)
+
+ # follow last line
+ self.check_overview.verticalScrollBar().setValue(
+ self.check_overview.verticalScrollBar().maximum()
+ )
+
+ def update_progress(self):
+ self.updatedatabasestats.click()
+
+ def create_graph(self):
+ self.summary.setCurrentIndex(0)
+ publisher = self.publishers.currentText()
+ duplicates = self.addDuplicates.isChecked()
+ if duplicates:
+ data = self.db.get_results_by_publisher(publisher, distinct=True)
+ else:
+ data = self.db.get_results_by_publisher(publisher, distinct=False)
+ publisher_count = self.db.get_publisher_count(publisher)
+ checked_count = self.db.get_checked_by_publisher(publisher)
+ self.total_results_publisher.setText(
+ "Anzahl Titel: {}, davon geprüft: {}".format(publisher_count, checked_count)
+ )
+ graph_data = {}
+ for item in data:
+ if item[1] in graph_data:
+ graph_data[item[1]] += 1
+ else:
+ graph_data[item[1]] = 1
+ table_data = []
+ for key, value in graph_data.items():
+ table_data.append((key, value))
+ self.create_pie_chart(
+ table_data, title="Statistik der Status Codes für {}".format(publisher)
+ )
+ self.add_table_data(table_data)
+
+ def add_table_data(self, data):
+ self.response_data.setRowCount(0)
+
+ for item in data:
+ rowPosition = self.response_data.rowCount()
+ self.response_data.insertRow(rowPosition)
+ self.response_data.setItem(
+ rowPosition, 0, QtWidgets.QTableWidgetItem(str(item[0]))
+ )
+ self.response_data.setItem(
+ rowPosition, 1, QtWidgets.QTableWidgetItem(str(item[1]))
+ )
+
+ def create_pie_chart(self, data, title="Count of Status Codes"):
+ # self.splitter.setSizes([80, 20])
+ # self.splitter.setSizes([2, 1])
+
+ if self.graph_frame.layout().count() > 0:
+ self.graph_frame.layout().itemAt(0).widget().deleteLater()
+ series = QPieSeries()
+ for item in data:
+ series.append(str(item[0]), item[1])
+
+ chart = QChart()
+ chart.addSeries(series)
+ chart.setAnimationOptions(QChart.AnimationOption.SeriesAnimations)
+ chart.legend().setVisible(True)
+ chart.legend().setAlignment(QtCore.Qt.AlignmentFlag.AlignRight)
+ chart.setTitle(title)
+
+ chartview = QChartView(chart)
+ chartview.setRenderHint(QPainter.RenderHint.Antialiasing)
+
+ self.graph_frame.layout().addWidget(chartview)
+ slices = series.slices()
+ slices.sort(key=lambda x: x.percentage(), reverse=True)
+ for slice in slices:
+ slice.setLabel(f"{slice.label()} {round(slice.percentage() * 100, 2)}%")
+ # display slice data on hover
+
+ def display_detailed_overview(self, item):
+ self.summary.setCurrentIndex(1)
+ # from self.response_data get the selected status code
+ status_code = self.response_data.item(item.row(), 0).text()
+ if status_code.isnumeric():
+ publisher = self.publishers.currentText()
+ data = self.db.get_results_by_publisher_and_status(publisher, status_code)
+ else:
+ publisher = status_code
+ status_code = self.status_code
+ data = self.db.get_results_by_publisher_and_status(publisher, status_code)
+ self.detailed_overview.setRowCount(0)
+ for res in data:
+ self.detailed_overview.insertRow(self.detailed_overview.rowCount())
+ self.detailed_overview.setItem(
+ self.detailed_overview.rowCount() - 1,
+ 0,
+ QtWidgets.QTableWidgetItem(str(res[0])),
+ )
+ self.detailed_overview.setItem(
+ self.detailed_overview.rowCount() - 1,
+ 1,
+ QtWidgets.QTableWidgetItem(str(res[1])),
+ )
+ self.detailed_overview.setItem(
+ self.detailed_overview.rowCount() - 1,
+ 2,
+ QtWidgets.QTableWidgetItem(str(res[2])),
+ )
+
+
+def launch():
+ app = QtWidgets.QApplication([])
+ # app.setStyle("Fusion")
+
+ window = MainWindow()
+ window.show()
+ app.exec()
diff --git a/src/ui/sources/Ui_main_interface.py b/src/ui/sources/Ui_main_interface.py
new file mode 100644
index 0000000..08044d1
--- /dev/null
+++ b/src/ui/sources/Ui_main_interface.py
@@ -0,0 +1,340 @@
+# Form implementation generated from reading ui file 'c:\Users\aky547\GitHub\LinkAvailableChecker\src\ui\sources\main_interface.ui'
+#
+# Created by: PyQt6 UI code generator 6.7.1
+#
+# WARNING: Any manual changes made to this file will be lost when pyuic6 is
+# run again. Do not edit this file unless you know what you are doing.
+
+
+from PyQt6 import QtCore, QtGui, QtWidgets
+
+
+class Ui_MainWindow(object):
+ def setupUi(self, MainWindow):
+ MainWindow.setObjectName("MainWindow")
+ MainWindow.resize(824, 740)
+ self.centralwidget = QtWidgets.QWidget(parent=MainWindow)
+ self.centralwidget.setObjectName("centralwidget")
+ self.verticalLayout = QtWidgets.QVBoxLayout(self.centralwidget)
+ self.verticalLayout.setObjectName("verticalLayout")
+ self.horizontalLayout_4 = QtWidgets.QHBoxLayout()
+ self.horizontalLayout_4.setObjectName("horizontalLayout_4")
+ self.database_stats = QtWidgets.QGroupBox(parent=self.centralwidget)
+ font = QtGui.QFont()
+ font.setBold(True)
+ self.database_stats.setFont(font)
+ self.database_stats.setObjectName("database_stats")
+ self.verticalLayout_6 = QtWidgets.QVBoxLayout(self.database_stats)
+ self.verticalLayout_6.setObjectName("verticalLayout_6")
+ self.horizontalLayout_6 = QtWidgets.QHBoxLayout()
+ self.horizontalLayout_6.setObjectName("horizontalLayout_6")
+ self.load_ppn = QtWidgets.QPushButton(parent=self.database_stats)
+ sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Policy.Maximum, QtWidgets.QSizePolicy.Policy.Fixed)
+ sizePolicy.setHorizontalStretch(0)
+ sizePolicy.setVerticalStretch(0)
+ sizePolicy.setHeightForWidth(self.load_ppn.sizePolicy().hasHeightForWidth())
+ self.load_ppn.setSizePolicy(sizePolicy)
+ font = QtGui.QFont()
+ font.setBold(False)
+ self.load_ppn.setFont(font)
+ self.load_ppn.setObjectName("load_ppn")
+ self.horizontalLayout_6.addWidget(self.load_ppn)
+ self.resetStatusCodes = QtWidgets.QPushButton(parent=self.database_stats)
+ sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Policy.Maximum, QtWidgets.QSizePolicy.Policy.Fixed)
+ sizePolicy.setHorizontalStretch(0)
+ sizePolicy.setVerticalStretch(0)
+ sizePolicy.setHeightForWidth(self.resetStatusCodes.sizePolicy().hasHeightForWidth())
+ self.resetStatusCodes.setSizePolicy(sizePolicy)
+ font = QtGui.QFont()
+ font.setBold(False)
+ self.resetStatusCodes.setFont(font)
+ self.resetStatusCodes.setObjectName("resetStatusCodes")
+ self.horizontalLayout_6.addWidget(self.resetStatusCodes)
+ self.verticalLayout_6.addLayout(self.horizontalLayout_6)
+ self.databasestats = QtWidgets.QLabel(parent=self.database_stats)
+ font = QtGui.QFont()
+ font.setBold(False)
+ self.databasestats.setFont(font)
+ self.databasestats.setText("")
+ self.databasestats.setTextFormat(QtCore.Qt.TextFormat.PlainText)
+ self.databasestats.setObjectName("databasestats")
+ self.verticalLayout_6.addWidget(self.databasestats)
+ self.stats_scroll_area_table = QtWidgets.QTableWidget(parent=self.database_stats)
+ self.stats_scroll_area_table.setMinimumSize(QtCore.QSize(0, 100))
+ font = QtGui.QFont()
+ font.setBold(False)
+ self.stats_scroll_area_table.setFont(font)
+ self.stats_scroll_area_table.setFrameShape(QtWidgets.QFrame.Shape.NoFrame)
+ self.stats_scroll_area_table.setVerticalScrollBarPolicy(QtCore.Qt.ScrollBarPolicy.ScrollBarAsNeeded)
+ self.stats_scroll_area_table.setHorizontalScrollBarPolicy(QtCore.Qt.ScrollBarPolicy.ScrollBarAsNeeded)
+ self.stats_scroll_area_table.setEditTriggers(QtWidgets.QAbstractItemView.EditTrigger.NoEditTriggers)
+ self.stats_scroll_area_table.setAlternatingRowColors(True)
+ self.stats_scroll_area_table.setObjectName("stats_scroll_area_table")
+ self.stats_scroll_area_table.setColumnCount(3)
+ self.stats_scroll_area_table.setRowCount(0)
+ item = QtWidgets.QTableWidgetItem()
+ self.stats_scroll_area_table.setHorizontalHeaderItem(0, item)
+ item = QtWidgets.QTableWidgetItem()
+ self.stats_scroll_area_table.setHorizontalHeaderItem(1, item)
+ item = QtWidgets.QTableWidgetItem()
+ self.stats_scroll_area_table.setHorizontalHeaderItem(2, item)
+ self.stats_scroll_area_table.horizontalHeader().setDefaultSectionSize(90)
+ self.stats_scroll_area_table.horizontalHeader().setMinimumSectionSize(50)
+ self.stats_scroll_area_table.horizontalHeader().setStretchLastSection(True)
+ self.stats_scroll_area_table.verticalHeader().setDefaultSectionSize(35)
+ self.verticalLayout_6.addWidget(self.stats_scroll_area_table)
+ self.updatedatabasestats = QtWidgets.QPushButton(parent=self.database_stats)
+ font = QtGui.QFont()
+ font.setBold(False)
+ self.updatedatabasestats.setFont(font)
+ self.updatedatabasestats.setObjectName("updatedatabasestats")
+ self.verticalLayout_6.addWidget(self.updatedatabasestats)
+ self.horizontalLayout_4.addWidget(self.database_stats)
+ self.verticalLayout_8 = QtWidgets.QVBoxLayout()
+ self.verticalLayout_8.setObjectName("verticalLayout_8")
+ self.verticalLayout_7 = QtWidgets.QVBoxLayout()
+ self.verticalLayout_7.setSizeConstraint(QtWidgets.QLayout.SizeConstraint.SetDefaultConstraint)
+ self.verticalLayout_7.setObjectName("verticalLayout_7")
+ self.horizontalLayout_3 = QtWidgets.QHBoxLayout()
+ self.horizontalLayout_3.setObjectName("horizontalLayout_3")
+ self.label = QtWidgets.QLabel(parent=self.centralwidget)
+ self.label.setObjectName("label")
+ self.horizontalLayout_3.addWidget(self.label)
+ self.publishers = QtWidgets.QComboBox(parent=self.centralwidget)
+ self.publishers.setEditable(True)
+ self.publishers.setObjectName("publishers")
+ self.horizontalLayout_3.addWidget(self.publishers)
+ self.horizontalLayout_3.setStretch(0, 1)
+ self.horizontalLayout_3.setStretch(1, 5)
+ self.verticalLayout_7.addLayout(self.horizontalLayout_3)
+ self.horizontalLayout_5 = QtWidgets.QHBoxLayout()
+ self.horizontalLayout_5.setObjectName("horizontalLayout_5")
+ spacerItem = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Policy.Expanding, QtWidgets.QSizePolicy.Policy.Minimum)
+ self.horizontalLayout_5.addItem(spacerItem)
+ self.addDuplicates = QtWidgets.QCheckBox(parent=self.centralwidget)
+ self.addDuplicates.setLayoutDirection(QtCore.Qt.LayoutDirection.LeftToRight)
+ self.addDuplicates.setChecked(True)
+ self.addDuplicates.setTristate(False)
+ self.addDuplicates.setObjectName("addDuplicates")
+ self.horizontalLayout_5.addWidget(self.addDuplicates)
+ self.verticalLayout_7.addLayout(self.horizontalLayout_5)
+ self.showResults = QtWidgets.QPushButton(parent=self.centralwidget)
+ self.showResults.setObjectName("showResults")
+ self.verticalLayout_7.addWidget(self.showResults)
+ self.groupBox = QtWidgets.QGroupBox(parent=self.centralwidget)
+ font = QtGui.QFont()
+ font.setBold(True)
+ self.groupBox.setFont(font)
+ self.groupBox.setObjectName("groupBox")
+ self.formLayout = QtWidgets.QFormLayout(self.groupBox)
+ self.formLayout.setObjectName("formLayout")
+ self.label_3 = QtWidgets.QLabel(parent=self.groupBox)
+ font = QtGui.QFont()
+ font.setBold(False)
+ self.label_3.setFont(font)
+ self.label_3.setObjectName("label_3")
+ self.formLayout.setWidget(0, QtWidgets.QFormLayout.ItemRole.LabelRole, self.label_3)
+ self.check_status_code = QtWidgets.QComboBox(parent=self.groupBox)
+ font = QtGui.QFont()
+ font.setBold(False)
+ self.check_status_code.setFont(font)
+ self.check_status_code.setObjectName("check_status_code")
+ self.formLayout.setWidget(0, QtWidgets.QFormLayout.ItemRole.FieldRole, self.check_status_code)
+ self.usePlaywright = QtWidgets.QCheckBox(parent=self.groupBox)
+ self.usePlaywright.setObjectName("usePlaywright")
+ self.formLayout.setWidget(1, QtWidgets.QFormLayout.ItemRole.LabelRole, self.usePlaywright)
+ self.check_code = QtWidgets.QPushButton(parent=self.groupBox)
+ font = QtGui.QFont()
+ font.setBold(False)
+ self.check_code.setFont(font)
+ self.check_code.setObjectName("check_code")
+ self.formLayout.setWidget(1, QtWidgets.QFormLayout.ItemRole.FieldRole, self.check_code)
+ self.groupBox_2 = QtWidgets.QGroupBox(parent=self.groupBox)
+ self.groupBox_2.setObjectName("groupBox_2")
+ self.verticalLayout_9 = QtWidgets.QVBoxLayout(self.groupBox_2)
+ self.verticalLayout_9.setObjectName("verticalLayout_9")
+ self.label_4 = QtWidgets.QLabel(parent=self.groupBox_2)
+ font = QtGui.QFont()
+ font.setBold(False)
+ self.label_4.setFont(font)
+ self.label_4.setObjectName("label_4")
+ self.verticalLayout_9.addWidget(self.label_4)
+ self.spinTimer = QtWidgets.QSpinBox(parent=self.groupBox_2)
+ font = QtGui.QFont()
+ font.setBold(False)
+ self.spinTimer.setFont(font)
+ self.spinTimer.setButtonSymbols(QtWidgets.QAbstractSpinBox.ButtonSymbols.UpDownArrows)
+ self.spinTimer.setMinimum(1)
+ self.spinTimer.setMaximum(60)
+ self.spinTimer.setObjectName("spinTimer")
+ self.verticalLayout_9.addWidget(self.spinTimer)
+ self.label_5 = QtWidgets.QLabel(parent=self.groupBox_2)
+ font = QtGui.QFont()
+ font.setBold(False)
+ self.label_5.setFont(font)
+ self.label_5.setObjectName("label_5")
+ self.verticalLayout_9.addWidget(self.label_5)
+ self.spinTimeout = QtWidgets.QSpinBox(parent=self.groupBox_2)
+ font = QtGui.QFont()
+ font.setBold(False)
+ self.spinTimeout.setFont(font)
+ self.spinTimeout.setButtonSymbols(QtWidgets.QAbstractSpinBox.ButtonSymbols.UpDownArrows)
+ self.spinTimeout.setMinimum(5)
+ self.spinTimeout.setMaximum(100)
+ self.spinTimeout.setObjectName("spinTimeout")
+ self.verticalLayout_9.addWidget(self.spinTimeout)
+ spacerItem1 = QtWidgets.QSpacerItem(20, 40, QtWidgets.QSizePolicy.Policy.Minimum, QtWidgets.QSizePolicy.Policy.Expanding)
+ self.verticalLayout_9.addItem(spacerItem1)
+ self.formLayout.setWidget(2, QtWidgets.QFormLayout.ItemRole.LabelRole, self.groupBox_2)
+ self.check_overview = QtWidgets.QTextBrowser(parent=self.groupBox)
+ sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Policy.Expanding, QtWidgets.QSizePolicy.Policy.Maximum)
+ sizePolicy.setHorizontalStretch(0)
+ sizePolicy.setVerticalStretch(0)
+ sizePolicy.setHeightForWidth(self.check_overview.sizePolicy().hasHeightForWidth())
+ self.check_overview.setSizePolicy(sizePolicy)
+ self.check_overview.setMinimumSize(QtCore.QSize(0, 100))
+ self.check_overview.setMaximumSize(QtCore.QSize(16777215, 130))
+ font = QtGui.QFont()
+ font.setBold(False)
+ self.check_overview.setFont(font)
+ self.check_overview.setOpenExternalLinks(True)
+ self.check_overview.setObjectName("check_overview")
+ self.formLayout.setWidget(2, QtWidgets.QFormLayout.ItemRole.FieldRole, self.check_overview)
+ self.horizontalLayout_7 = QtWidgets.QHBoxLayout()
+ self.horizontalLayout_7.setObjectName("horizontalLayout_7")
+ self.widget = QtWidgets.QWidget(parent=self.groupBox)
+ self.widget.setMinimumSize(QtCore.QSize(0, 30))
+ self.widget.setObjectName("widget")
+ self.horizontalLayout_7.addWidget(self.widget)
+ self.formLayout.setLayout(3, QtWidgets.QFormLayout.ItemRole.FieldRole, self.horizontalLayout_7)
+ self.verticalLayout_7.addWidget(self.groupBox)
+ self.verticalLayout_8.addLayout(self.verticalLayout_7)
+ self.horizontalLayout_4.addLayout(self.verticalLayout_8)
+ self.horizontalLayout_4.setStretch(0, 1)
+ self.horizontalLayout_4.setStretch(1, 2)
+ self.verticalLayout.addLayout(self.horizontalLayout_4)
+ self.widget1 = QtWidgets.QWidget(parent=self.centralwidget)
+ self.widget1.setObjectName("widget1")
+ self.horizontalLayout = QtWidgets.QHBoxLayout(self.widget1)
+ self.horizontalLayout.setObjectName("horizontalLayout")
+ self.graph_frame = QtWidgets.QFrame(parent=self.widget1)
+ self.graph_frame.setFrameShape(QtWidgets.QFrame.Shape.StyledPanel)
+ self.graph_frame.setFrameShadow(QtWidgets.QFrame.Shadow.Raised)
+ self.graph_frame.setObjectName("graph_frame")
+ self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.graph_frame)
+ self.verticalLayout_2.setContentsMargins(2, 2, 2, 2)
+ self.verticalLayout_2.setSpacing(0)
+ self.verticalLayout_2.setObjectName("verticalLayout_2")
+ self.horizontalLayout.addWidget(self.graph_frame)
+ self.chart_frame = QtWidgets.QFrame(parent=self.widget1)
+ self.chart_frame.setFrameShape(QtWidgets.QFrame.Shape.StyledPanel)
+ self.chart_frame.setFrameShadow(QtWidgets.QFrame.Shadow.Raised)
+ self.chart_frame.setObjectName("chart_frame")
+ self.verticalLayout_3 = QtWidgets.QVBoxLayout(self.chart_frame)
+ self.verticalLayout_3.setContentsMargins(2, 2, 2, 2)
+ self.verticalLayout_3.setSpacing(0)
+ self.verticalLayout_3.setObjectName("verticalLayout_3")
+ self.summary = QtWidgets.QStackedWidget(parent=self.chart_frame)
+ self.summary.setObjectName("summary")
+ self.page = QtWidgets.QWidget()
+ self.page.setObjectName("page")
+ self.verticalLayout_5 = QtWidgets.QVBoxLayout(self.page)
+ self.verticalLayout_5.setObjectName("verticalLayout_5")
+ self.response_data = QtWidgets.QTableWidget(parent=self.page)
+ self.response_data.setEditTriggers(QtWidgets.QAbstractItemView.EditTrigger.NoEditTriggers)
+ self.response_data.setAlternatingRowColors(True)
+ self.response_data.setObjectName("response_data")
+ self.response_data.setColumnCount(2)
+ self.response_data.setRowCount(0)
+ item = QtWidgets.QTableWidgetItem()
+ self.response_data.setHorizontalHeaderItem(0, item)
+ item = QtWidgets.QTableWidgetItem()
+ self.response_data.setHorizontalHeaderItem(1, item)
+ self.verticalLayout_5.addWidget(self.response_data)
+ self.total_results_publisher = QtWidgets.QLabel(parent=self.page)
+ self.total_results_publisher.setText("")
+ self.total_results_publisher.setObjectName("total_results_publisher")
+ self.verticalLayout_5.addWidget(self.total_results_publisher)
+ self.exportAll = QtWidgets.QPushButton(parent=self.page)
+ self.exportAll.setObjectName("exportAll")
+ self.verticalLayout_5.addWidget(self.exportAll)
+ self.summary.addWidget(self.page)
+ self.page_2 = QtWidgets.QWidget()
+ self.page_2.setObjectName("page_2")
+ self.verticalLayout_4 = QtWidgets.QVBoxLayout(self.page_2)
+ self.verticalLayout_4.setObjectName("verticalLayout_4")
+ self.detailed_overview = QtWidgets.QTableWidget(parent=self.page_2)
+ self.detailed_overview.setObjectName("detailed_overview")
+ self.detailed_overview.setColumnCount(3)
+ self.detailed_overview.setRowCount(0)
+ item = QtWidgets.QTableWidgetItem()
+ self.detailed_overview.setHorizontalHeaderItem(0, item)
+ item = QtWidgets.QTableWidgetItem()
+ self.detailed_overview.setHorizontalHeaderItem(1, item)
+ item = QtWidgets.QTableWidgetItem()
+ self.detailed_overview.setHorizontalHeaderItem(2, item)
+ self.verticalLayout_4.addWidget(self.detailed_overview)
+ self.exportBtn = QtWidgets.QPushButton(parent=self.page_2)
+ self.exportBtn.setObjectName("exportBtn")
+ self.verticalLayout_4.addWidget(self.exportBtn)
+ self.summary.addWidget(self.page_2)
+ self.verticalLayout_3.addWidget(self.summary)
+ self.horizontalLayout.addWidget(self.chart_frame)
+ self.verticalLayout.addWidget(self.widget1)
+ self.verticalLayout.setStretch(0, 1)
+ self.verticalLayout.setStretch(1, 4)
+ MainWindow.setCentralWidget(self.centralwidget)
+ self.statusbar = QtWidgets.QStatusBar(parent=MainWindow)
+ self.statusbar.setObjectName("statusbar")
+ MainWindow.setStatusBar(self.statusbar)
+ self.actionPPNs_laden = QtGui.QAction(parent=MainWindow)
+ self.actionPPNs_laden.setObjectName("actionPPNs_laden")
+ self.actionStatus_Code_zur_cksetzen = QtGui.QAction(parent=MainWindow)
+ self.actionStatus_Code_zur_cksetzen.setObjectName("actionStatus_Code_zur_cksetzen")
+
+ self.retranslateUi(MainWindow)
+ self.summary.setCurrentIndex(0)
+ QtCore.QMetaObject.connectSlotsByName(MainWindow)
+
+ def retranslateUi(self, MainWindow):
+ _translate = QtCore.QCoreApplication.translate
+ MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
+ self.database_stats.setTitle(_translate("MainWindow", "Statistik der Datenbank"))
+ self.load_ppn.setText(_translate("MainWindow", "PPNs laden"))
+ self.resetStatusCodes.setText(_translate("MainWindow", "Status Codes löschen"))
+ item = self.stats_scroll_area_table.horizontalHeaderItem(0)
+ item.setText(_translate("MainWindow", "Status Code"))
+ item = self.stats_scroll_area_table.horizontalHeaderItem(1)
+ item.setText(_translate("MainWindow", "Anzahl Treffer"))
+ item = self.stats_scroll_area_table.horizontalHeaderItem(2)
+ item.setText(_translate("MainWindow", "%"))
+ self.updatedatabasestats.setText(_translate("MainWindow", "Aktualisieren"))
+ self.label.setText(_translate("MainWindow", "Verlag"))
+ self.addDuplicates.setText(_translate("MainWindow", "Dubletten entfernen"))
+ self.showResults.setText(_translate("MainWindow", "Daten anzeigen"))
+ self.groupBox.setTitle(_translate("MainWindow", "Statuscode(s) prüfen"))
+ self.label_3.setText(_translate("MainWindow", "Status Code"))
+ self.usePlaywright.setToolTip(_translate("MainWindow", "Verwendet einen emulierten Browser, um die Seiten zu prüfen, dauert länger, kein ETA berechenbar"))
+ self.usePlaywright.setText(_translate("MainWindow", "Browser\n"
+"verwenden"))
+ self.check_code.setText(_translate("MainWindow", "Prüfen"))
+ self.groupBox_2.setTitle(_translate("MainWindow", "Timers"))
+ self.label_4.setToolTip(_translate("MainWindow", "Pause zwischen den Anfragen"))
+ self.label_4.setText(_translate("MainWindow", "Sleep Timer"))
+ self.label_5.setToolTip(_translate("MainWindow", "Limit, ab wann eine Anfrage als ungültig gewertet wird"))
+ self.label_5.setText(_translate("MainWindow", "Timeout"))
+ self.response_data.setSortingEnabled(True)
+ item = self.response_data.horizontalHeaderItem(0)
+ item.setText(_translate("MainWindow", "Response Code"))
+ item = self.response_data.horizontalHeaderItem(1)
+ item.setText(_translate("MainWindow", "Anzahl Treffer"))
+ self.exportAll.setText(_translate("MainWindow", "Alles exportieren"))
+ item = self.detailed_overview.horizontalHeaderItem(0)
+ item.setText(_translate("MainWindow", "PPN"))
+ item = self.detailed_overview.horizontalHeaderItem(1)
+ item.setText(_translate("MainWindow", "Unser Link"))
+ item = self.detailed_overview.horizontalHeaderItem(2)
+ item.setText(_translate("MainWindow", "Ergebnis"))
+ self.exportBtn.setText(_translate("MainWindow", "Daten exportieren"))
+ self.actionPPNs_laden.setText(_translate("MainWindow", "PPNs laden"))
+ self.actionStatus_Code_zur_cksetzen.setText(_translate("MainWindow", "Status Code zurücksetzen"))
diff --git a/src/ui/sources/main_interface.ui b/src/ui/sources/main_interface.ui
new file mode 100644
index 0000000..416fcf3
--- /dev/null
+++ b/src/ui/sources/main_interface.ui
@@ -0,0 +1,573 @@
+
+
+ MainWindow
+
+
+
+ 0
+ 0
+ 824
+ 740
+
+
+
+ MainWindow
+
+
+
+ -
+
+
-
+
+
+
+ true
+
+
+
+ Statistik der Datenbank
+
+
+
-
+
+
-
+
+
+
+ 0
+ 0
+
+
+
+
+ false
+
+
+
+ PPNs laden
+
+
+
+ -
+
+
+
+ 0
+ 0
+
+
+
+
+ false
+
+
+
+ Status Codes löschen
+
+
+
+
+
+ -
+
+
+
+ false
+
+
+
+
+
+
+ Qt::PlainText
+
+
+
+ -
+
+
+
+ 0
+ 100
+
+
+
+
+ false
+
+
+
+ QFrame::NoFrame
+
+
+ Qt::ScrollBarAsNeeded
+
+
+ Qt::ScrollBarAsNeeded
+
+
+ QAbstractItemView::NoEditTriggers
+
+
+ true
+
+
+ 50
+
+
+ 90
+
+
+ true
+
+
+ 35
+
+
+
+ Status Code
+
+
+
+
+ Anzahl Treffer
+
+
+
+
+ %
+
+
+
+
+ -
+
+
+
+ false
+
+
+
+ Aktualisieren
+
+
+
+
+
+
+ -
+
+
-
+
+
+ QLayout::SetDefaultConstraint
+
+
-
+
+
-
+
+
+ Verlag
+
+
+
+ -
+
+
+ true
+
+
+
+
+
+ -
+
+
-
+
+
+ Qt::Horizontal
+
+
+
+ 40
+ 20
+
+
+
+
+ -
+
+
+ Qt::LeftToRight
+
+
+ Dubletten entfernen
+
+
+ true
+
+
+ false
+
+
+
+
+
+ -
+
+
+ Daten anzeigen
+
+
+
+ -
+
+
+
+ true
+
+
+
+ Statuscode prüfen
+
+
+
-
+
+
+
+ false
+
+
+
+ Status Code
+
+
+
+ -
+
+
+
+ false
+
+
+
+
+ -
+
+
+ Verwendet einen emulierten Browser, um die Seiten zu prüfen, dauert länger, kein ETA berechenbar
+
+
+ Browser
+verwenden
+
+
+
+ -
+
+
+
+ false
+
+
+
+ Prüfen
+
+
+
+ -
+
+
+ Timers
+
+
+
-
+
+
+
+ false
+
+
+
+ Pause zwischen den Anfragen
+
+
+ Sleep Timer
+
+
+
+ -
+
+
+
+ false
+
+
+
+ QAbstractSpinBox::UpDownArrows
+
+
+ 1
+
+
+ 60
+
+
+
+ -
+
+
+
+ false
+
+
+
+ Limit, ab wann eine Anfrage als ungültig gewertet wird
+
+
+ Timeout
+
+
+
+ -
+
+
+
+ false
+
+
+
+ QAbstractSpinBox::UpDownArrows
+
+
+ 5
+
+
+ 100
+
+
+
+ -
+
+
+ Qt::Vertical
+
+
+
+ 20
+ 40
+
+
+
+
+
+
+
+ -
+
+
+
+ 0
+ 0
+
+
+
+
+ 0
+ 100
+
+
+
+
+ 16777215
+ 130
+
+
+
+
+ false
+
+
+
+ true
+
+
+
+ -
+
+
-
+
+
+
+ 0
+ 30
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ -
+
+
+
-
+
+
+ QFrame::StyledPanel
+
+
+ QFrame::Raised
+
+
+
+ 0
+
+
+ 2
+
+
+ 2
+
+
+ 2
+
+
+ 2
+
+
+
+
+ -
+
+
+ QFrame::StyledPanel
+
+
+ QFrame::Raised
+
+
+
+ 0
+
+
+ 2
+
+
+ 2
+
+
+ 2
+
+
+ 2
+
+
-
+
+
+ 0
+
+
+
+
-
+
+
+ QAbstractItemView::NoEditTriggers
+
+
+ true
+
+
+ true
+
+
+
+ Response Code
+
+
+
+
+ Anzahl Treffer
+
+
+
+
+ -
+
+
+
+
+
+
+ -
+
+
+ Alles exportieren
+
+
+
+
+
+
+
+ -
+
+
+
+ PPN
+
+
+
+
+ Unser Link
+
+
+
+
+ Ergebnis
+
+
+
+
+ -
+
+
+ Daten exportieren
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ PPNs laden
+
+
+
+
+ Status Code zurücksetzen
+
+
+
+
+
+
diff --git a/src/ui/threads.py b/src/ui/threads.py
new file mode 100644
index 0000000..8576273
--- /dev/null
+++ b/src/ui/threads.py
@@ -0,0 +1,495 @@
+from PyQt6 import QtCore, QtWidgets
+from PyQt6.QtCore import QThread, pyqtSignal, QTimer
+from src.database import Database
+import time
+import loguru
+import xml.etree.ElementTree as ET
+import requests
+from ratelimit import limits, sleep_and_retry
+from datetime import timedelta
+from datetime import datetime
+import asyncio
+from playwright.async_api import async_playwright
+import tqdm
+import os
+
+log = loguru.logger
+log.remove()
+log.add("status_code.log", rotation="100 MB")
+
+
+class Qtqdm(tqdm.std.tqdm):
+ """
+ Override the tqdm class so we can push updates via a custom callback
+ """
+
+ def __init__(
+ self,
+ iterable=None,
+ desc=None,
+ total=None,
+ leave=True,
+ file=open(os.devnull, "w"),
+ ncols=None,
+ mininterval=0.1,
+ maxinterval=10.0,
+ miniters=None,
+ ascii=None,
+ disable=False,
+ unit="it",
+ unit_scale=False,
+ dynamic_ncols=False,
+ smoothing=0.3,
+ bar_format=None,
+ initial=0,
+ position=None,
+ postfix=None,
+ unit_divisor=1000,
+ write_bytes=None,
+ lock_args=None,
+ nrows=None,
+ colour=None,
+ delay=0,
+ gui=False,
+ update_callback=None,
+ **kwargs,
+ ):
+ self._update_callback = update_callback
+ super(Qtqdm, self).__init__(
+ iterable,
+ desc,
+ total,
+ leave,
+ file, # no change here
+ ncols,
+ mininterval,
+ maxinterval,
+ miniters,
+ ascii,
+ disable,
+ unit,
+ unit_scale,
+ False, # change param ?
+ smoothing,
+ bar_format,
+ initial,
+ position,
+ postfix,
+ unit_divisor,
+ gui,
+ **kwargs,
+ )
+
+ # override the method to call a custom callback on every refresh
+ def refresh(self, nolock=False, lock_args=None):
+ super(Qtqdm, self).refresh(nolock=nolock, lock_args=lock_args)
+ if self._update_callback:
+ self._update_callback(self.format_dict)
+
+
+class ETAManager(QtCore.QObject):
+ etaSignal = QtCore.pyqtSignal(str)
+
+ def __init__(self, links):
+ super().__init__()
+ self.remaining_time = 0
+ self.links = links
+ self.running = True
+ self.eta_thread = QtCore.QThread()
+ self.eta_worker = CountdownManagedWorker(self.remaining_time, links)
+ self.eta_worker.moveToThread(self.eta_thread)
+ self.eta_worker.etaSignal.connect(self.etaSignal.emit)
+ self.eta_thread.started.connect(self.eta_worker.run)
+ self.times = []
+
+ def start(self):
+ """Start the ETA thread."""
+ self.eta_thread.start()
+
+ def calculate_average(self):
+ if len(self.times) == 0:
+ return 0
+ return sum(self.times) / len(self.times)
+
+ def estimate_remaining_time(self):
+ average_time = self.calculate_average()
+ return average_time * len(self.check_links)
+
+ def add_timestamp(self, timestamp):
+ self.times.append(timestamp)
+
+
+class CountdownManagedWorker(QtCore.QObject):
+ etaSignal = QtCore.pyqtSignal(str)
+
+ def __init__(self, total_time):
+ super().__init__()
+ self.remaining_time = total_time
+ self.check_links = None
+ self.running = True
+ self.times = []
+
+ def run(self):
+ """Runs the countdown timer, emitting updated ETAs every second."""
+ while self.running:
+ etatime = str(timedelta(seconds=int(self.remaining_time)))
+ self.etaSignal.emit(etatime)
+ time.sleep(1)
+ self.remaining_time -= 1
+
+ self.etaSignal.emit("00:00:00")
+
+
+class CountdownWorker(QtCore.QObject):
+ """Worker for ETA countdown, running in a separate QThread."""
+
+ etaSignal = QtCore.pyqtSignal(str)
+
+ def __init__(self, total_time):
+ super().__init__()
+ self.remaining_time = total_time
+ self.running = True
+
+ def run(self):
+ """Runs the countdown timer, emitting updated ETAs every second."""
+ while self.remaining_time > 0 and self.running:
+ etatime = str(timedelta(seconds=int(self.remaining_time)))
+ self.etaSignal.emit(etatime)
+ time.sleep(1)
+ self.remaining_time -= 1
+
+ self.etaSignal.emit("00:00:00")
+
+ def update_remaining_time(self, remaining_time):
+ """Updates the remaining time dynamically."""
+ self.remaining_time = max(0, remaining_time)
+
+ def stop(self):
+ """Stops the countdown."""
+ self.running = False
+
+
+class CheckThread(QtCore.QThread):
+ updateSignal = QtCore.pyqtSignal()
+ total_entries_signal = QtCore.pyqtSignal(int)
+ resultSignal = QtCore.pyqtSignal(str)
+ etaSignal = QtCore.pyqtSignal(dict)
+ startSignal = QtCore.pyqtSignal()
+ progress = pyqtSignal(dict)
+
+ def __init__(self, parent=None, status_code=None):
+ super().__init__(parent)
+ self.check_code = None
+ self.status_code = status_code
+ self.sleepTimer = 0
+ self.timeout = 0
+ self.per_request_time = sum([self.sleepTimer, self.timeout])
+ self.running = True
+ self.eta_worker = None
+ self.eta_thread = None
+
+ def set_status_code(self, status_code):
+ self.status_code = status_code
+
+ def setTimes(self, timeout, sleepTimer):
+ self.timeout = timeout
+ self.sleepTimer = sleepTimer
+ self.per_request_time = sum([self.sleepTimer, self.timeout])
+
+ def run(self):
+ self.db = Database("lfer.db")
+ links = self.db.get_links_by_response_code(self.status_code)
+ self.total_entries_signal.emit(len(links))
+
+ if len(links) == 0:
+ self.etaSignal.emit({"text": "Done"})
+ return
+ remaining_time = len(links) * self.per_request_time
+
+ # self.eta_thread = QtCore.QThread()
+ # self.eta_worker = CountdownWorker(remaining_time)
+ # self.eta_worker.moveToThread(self.eta_thread)
+ # self.eta_worker.etaSignal.connect(
+ # self.etaSignal.emit, QtCore.Qt.ConnectionType.DirectConnection
+ # )
+ # self.eta_thread.started.connect(self.eta_worker.run)
+ # self.eta_thread.start()
+ tqdm_object = Qtqdm(
+ range(len(links)), unit_scale=True, update_callback=self._update_callback
+ )
+ self.startSignal.emit()
+ for i in tqdm_object:
+ if not self.running:
+ break
+ id, url = links[i]
+ response_code, destination_link = self.get_status_code(url)
+ self.db.update_response_code(id, response_code, destination_link)
+ self.resultSignal.emit(f"{url} : {response_code}")
+ self.updateSignal.emit()
+ time.sleep(self.sleepTimer)
+
+ # for i, (id, url) in enumerate(links):
+ # if not self.running:
+ # break
+
+ # response_code, destination_link = self.get_status_code(url)
+ # self.db.update_response_code(id, response_code, destination_link)
+ # self.updateSignal.emit(i + 1)
+
+ # # Update remaining time dynamically
+ # remaining_time -= self.per_request_time
+ # self.eta_worker.update_remaining_time(remaining_time)
+
+ # time.sleep(self.sleepTimer)
+
+ self.db.close()
+ # self.running = False # Stop the ETA countdown
+ # self.eta_thread.quit()
+ # self.eta_thread.wait()
+
+ def _update_callback(self, status):
+ self.progress.emit(status)
+
+ def stop(self):
+ """Stops the processing and ETA update."""
+ self.running = False
+
+ # for i in range(len(links)):
+ # id, url = links[i]
+ # response_code, destination_link = self.get_status_code(url)
+ # self.db.update_response_code(id, response_code, destination_link)
+ # self.updateSignal.emit(i + 1)
+ # self.resultSignal.emit(f"{url} : {response_code}")
+ # time.sleep(self.sleepTimer)
+ # self.db.close()
+
+ @log.catch()
+ @sleep_and_retry
+ def get_status_code(self, url):
+ non_support = ["d-nb.info", ".jpg", ".png", ".jpeg"]
+
+ if any(x in url for x in non_support):
+ log.error(f"URL: {url}, ERROR: Site not supported")
+ return -2, "Site not supported"
+ if "Error" in url:
+ log.error(f"URL: {url}, ERROR: No data found")
+ return -1, "No data found"
+ try:
+ # userAgent = "Automated LFER Status Code Checker/1.1 (alexander.kirchner@ph-freiburg.de)"
+ userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
+ accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
+ headers = {"User-Agent": userAgent, "Accept": accept}
+ response = requests.get(url, headers=headers, timeout=self.timeout)
+ log.info(f"URL: {url}, Status Code: {response.status_code}")
+ return response.status_code, response.url
+ except Exception as e:
+ log.error(f"URL: {url}, Status Code: 0")
+ return 0, str(e)
+
+
+class WebscraperThread(QtCore.QThread):
+ updateSignal = QtCore.pyqtSignal(int)
+ total_entries_signal = QtCore.pyqtSignal(int)
+
+ def __init__(self, parent=None, ppnfilePath=None):
+ super().__init__(parent)
+ self.ppnfilePath = ppnfilePath
+
+ def set_ppnfilePath(self, ppnfilePath):
+ self.ppnfilePath = ppnfilePath
+
+ def run(self):
+ self.db = Database("lfer.db")
+ with open(self.ppnfilePath, "r") as f:
+ ppns = f.read()
+ ppns = ppns.split("\n")
+ self.total_entries_signal.emit(len(ppns))
+ for i in range(len(ppns)):
+ ppn = ppns[i]
+ data = self.fetch_data(ppn)
+ links = self.process_response(data)
+ if links is None:
+ self.db.add_data(ppn, "Error: No data found")
+ else:
+ for link in links:
+ self.db.add_data(ppn, link)
+ self.updateSignal.emit(i + 1)
+ time.sleep(0.1)
+ self.db.close()
+
+ @sleep_and_retry
+ @limits(calls=10, period=1)
+ def fetch_data(self, ppn):
+ api_url = f"https://sru.bsz-bw.de/swb?version=1.1&query=pica.ppn%3D{ppn}&operation=searchRetrieve&maximumRecords=10&recordSchema=marcxmlk10os"
+ response = requests.get(api_url)
+ return response.text
+
+ def process_response(self, response):
+ """Extracts URLs from datafield 856, subfield u."""
+ try:
+ root = ET.fromstring(response)
+ namespace = {
+ "zs": "http://www.loc.gov/zing/srw/",
+ "marc": "http://www.loc.gov/MARC21/slim",
+ }
+
+ # Find all recordData elements
+ record_data = root.find(".//zs:recordData", namespace)
+ if record_data is None:
+ return None
+
+ # Find all datafield 856 elements
+ links = []
+ for datafield in record_data.findall(
+ ".//marc:datafield[@tag='856']", namespace
+ ):
+ for subfield in datafield.findall(
+ "marc:subfield[@code='u']", namespace
+ ):
+ links.append(subfield.text)
+
+ return links if links else None
+ except ET.ParseError:
+ return None
+
+
+class StatusCodeThread(QtCore.QThread):
+ progressSignal = QtCore.pyqtSignal(int)
+ total_entries_signal = QtCore.pyqtSignal(int)
+ current_data_signal = QtCore.pyqtSignal(tuple)
+
+ def __init__(self, parent=None, status_code=None):
+ super().__init__(parent)
+ self.status_code = status_code
+
+ def set_status_code(self, status_code):
+ self.status_code = status_code
+
+ def run(self):
+ self.db = Database("lfer.db")
+ publishers = self.db.get_publishers()
+ self.total_entries_signal.emit(len(publishers))
+ for i in range(len(publishers)):
+ data = self.db.get_num_of_links_for_status_code_and_publisher(
+ publishers[i], self.status_code
+ )
+ self.current_data_signal.emit((publishers[i], data[0]))
+ self.progressSignal.emit(i + 1)
+
+
+class CheckThreadPlaywright(QtCore.QThread):
+ updateSignal = QtCore.pyqtSignal(int)
+ total_entries_signal = QtCore.pyqtSignal(int)
+ resultSignal = QtCore.pyqtSignal(str)
+ etaSignal = QtCore.pyqtSignal(str)
+
+ def __init__(self, parent=None, status_code=None):
+ super().__init__(parent)
+ self.check_code = None
+ self.status_code = status_code
+ self.running = True
+ self.eta_worker = None
+ self.eta_thread = None
+ self.browser = None # Browser will be initialized asynchronously
+
+ def set_status_code(self, status_code):
+ self.status_code = status_code
+
+ def setTimes(self, timeout, sleepTimer):
+ self.timeout = timeout
+ self.sleepTimer = sleepTimer
+ self.per_request_time = sum([self.sleepTimer, self.timeout])
+
+ async def getBrowser(self):
+ """Asynchronously launches Playwright browser"""
+ self.playwright = await async_playwright().start()
+ browser = await self.playwright.chromium.launch()
+ return browser
+
+ async def get_page_status(self, browser, url):
+ """Asynchronously fetches page status"""
+ page = await browser.new_page()
+
+ try:
+ response = await page.goto(url, wait_until="domcontentloaded")
+ # If response is None, the request was aborted (e.g., a PDF opened)
+ if response is None:
+ print(f"Navigation aborted (likely due to PDF): {url}")
+ await page.close()
+ return -3, url # Treat as success
+
+ destination_link = response.url
+ status_code = response.status
+ except Exception as e:
+ print(f"Error loading {url}: {e}")
+ status_code = -3 # Custom error code
+ destination_link = url
+ finally:
+ await page.close()
+ return status_code, destination_link
+
+ def run(self):
+ """Starts Playwright in an event loop"""
+ asyncio.run(self.run_async())
+
+ async def estimate_remaining_time(self, links):
+ # open a single playwright instance to estimate the time it takes to process a single request
+ playwright = await async_playwright().start()
+ browser = await playwright.chromium.launch()
+ request_start_time = datetime.now()
+ await self.get_page_status(browser, links[0][1])
+ await browser.close()
+ await playwright.stop()
+ request_done_time = datetime.now()
+ difference = (request_done_time - request_start_time).seconds
+ remaining_time = len(links) * difference
+ return remaining_time
+
+ async def run_async(self):
+ """Async version of the run method"""
+ self.db = Database("lfer.db")
+ links = self.db.get_links_by_response_code(self.status_code)
+ self.total_entries_signal.emit(len(links))
+
+ if len(links) == 0:
+ self.etaSignal.emit("Done")
+ return
+
+ playwright = await async_playwright().start()
+ browser = await playwright.chromium.launch()
+ remaining_time = await self.estimate_remaining_time(links)
+
+ self.eta_thread = QtCore.QThread()
+ self.eta_worker = CountdownWorker(remaining_time)
+ self.eta_worker.moveToThread(self.eta_thread)
+ self.eta_worker.etaSignal.connect(
+ self.etaSignal.emit, QtCore.Qt.ConnectionType.DirectConnection
+ )
+ self.eta_thread.started.connect(self.eta_worker.run)
+ self.eta_thread.start()
+
+ for i, (id, url) in enumerate(links):
+ if not self.running:
+ break
+ request_start_time = datetime.now()
+ response_code, destination_link = await self.get_page_status(browser, url)
+ self.db.update_response_code(id, response_code, destination_link)
+ self.updateSignal.emit(i + 1)
+ self.resultSignal.emit(f"{url} : {response_code}")
+ request_done_time = datetime.now()
+ difference = (request_done_time - request_start_time).seconds
+ # estimate the ETA based on the time it took to process the request
+
+ remaining_time -= difference
+ self.eta_worker.update_remaining_time(remaining_time)
+
+ await browser.close()
+ await playwright.stop() # Ensure Playwright stops properly
+ self.db.close()
+ self.running = False # Stop the ETA countdown
+ self.eta_thread.quit()
+ self.eta_thread.wait()
+
+ def stop(self):
+ """Stops the processing and ETA update."""
+ self.running = False
diff --git a/src/ui/utils.py b/src/ui/utils.py
new file mode 100644
index 0000000..80331d9
--- /dev/null
+++ b/src/ui/utils.py
@@ -0,0 +1,44 @@
+from PyQt6.QtWidgets import QProgressBar
+from PyQt6.QtCore import pyqtSlot
+from PyQt6 import QtCore
+import datetime
+
+
+class QtqdmProgressBar(QProgressBar):
+ def __init__(self, parent):
+ super(QtqdmProgressBar, self).__init__(parent)
+ self.setMinimumWidth(300) # Set minimum width
+ self.setMinimumHeight(20) # Set minimum height
+ self.setTextVisible(True) # Enable text display
+ self.setFormat("%p%") # Set format to show percentage
+ self.setAlignment(QtCore.Qt.AlignmentFlag.AlignCenter)
+
+ @pyqtSlot(dict)
+ def make_progress(self, status: dict):
+ # print(status)
+ # {'n': 29, 'total': 30, 'elapsed': 2.9780092239379883, 'ncols': None, 'nrows': None,
+ # 'prefix': 'Progress bar Name: ', 'ascii': False, 'unit': 'it', 'unit_scale': True,
+ # 'rate': 9.74477716265916, 'bar_format': None, 'postfix': None, 'unit_divisor': 1000, 'initial': 0,
+ # 'colour': None}
+
+ initial = status.get("initial", 0)
+ total = status.get("total", 0)
+ iteration = status.get("n", 0)
+ unit = status.get("unit", "it")
+ rate = status.get("rate", 0.0)
+ rate = (
+ 0.0 if rate is None else rate
+ ) # rate is None on the start of the iteration
+ time_elapsed = status.get("elapsed", 0.0)
+ time_remaining = ((total - iteration) / rate) if rate and rate > 0 else 0
+ # get remaining time in DD:HH:MM:SS format
+ formated_time_remaining = str(datetime.timedelta(seconds=time_remaining))
+ prefix = status.get("prefix", "")
+ text = status.get("text", "")
+
+ self.setMinimum(initial or 0)
+ self.setMaximum(total or 0)
+ self.setValue(iteration or 0)
+ self.setFormat(
+ f"{prefix} %v of %m (%p%) at {rate:0.4f} {unit}/sec after {time_elapsed:0.2f} sec > {formated_time_remaining}" # was time_remaining:0.2f sec
+ )
diff --git a/status_code.py b/status_code.py
new file mode 100644
index 0000000..ac437d9
--- /dev/null
+++ b/status_code.py
@@ -0,0 +1,104 @@
+import requests
+from src.database import Database
+import threading
+from ratelimit import limits, sleep_and_retry
+import time
+
+import loguru
+
+log = loguru.logger
+log.remove()
+log.add("status_code.log", rotation="100 MB")
+
+
+THREADS = 10
+threadlist = []
+db = Database("lfer.db")
+
+links = db.get_links()
+LINKLEN = len(links)
+LINKPROGRESS = 0
+RESPONSES = []
+non_support = ["d-nb.info", ".jpg", ".png", ".jpeg"]
+
+
+@log.catch()
+def get_status_code(url):
+ if any(x in url for x in non_support):
+ log.error(f"URL: {url}, ERROR: Site not supported")
+ return -2, "Site not supported"
+ if "Error" in url:
+ log.error(f"URL: {url}, ERROR: No data found")
+ return -1, "No data found"
+ try:
+ userAgent = (
+ "Automated LFER Status Code Checker/1.0 (alexander.kirchner@ph-freiburg.de)"
+ )
+ accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
+ headers = {"User-Agent": userAgent, "Accept": accept}
+ response = requests.get(url, headers=headers, timeout=50)
+ log.info(f"URL: {url}, Status Code: {response.status_code}")
+ return response.status_code, response.url
+ except Exception as e:
+ log.error(f"URL: {url}, Status Code: 0")
+ return 0, str(e)
+
+
+def worker(listpart):
+ global LINKPROGRESS
+ global RESPONSES
+ for link in listpart:
+ id, url = link
+ response_code, destination_link = get_status_code(url)
+ RESPONSES.append((id, response_code, destination_link))
+ LINKPROGRESS += 1
+ print("Progress: ", LINKPROGRESS, "/", LINKLEN, end="\r")
+
+
+def main_threaded():
+ global threadlist
+ global links
+ global THREADS
+ global LINKLEN
+ global LINKPROGRESS
+ for i in range(THREADS):
+ start = i * (LINKLEN // THREADS)
+ end = (i + 1) * (LINKLEN // THREADS)
+ if i == THREADS - 1:
+ end = LINKLEN
+ threadlist.append(threading.Thread(target=worker, args=(links[start:end],)))
+ for thread in threadlist:
+ thread.start()
+ for thread in threadlist:
+ thread.join()
+ for response in RESPONSES:
+ id, response_code, destination_link = response
+ db.update_response_code(id, response_code, destination_link)
+ print("Done")
+
+
+def main():
+ for i in range(len(links)):
+ id, url = links[i]
+ response_code, destination_link = get_status_code(url)
+ db.update_response_code(id, response_code, destination_link)
+ print("Progress: ", i + 1, "/", LINKLEN, end="\r")
+ time.sleep(1)
+ print("Done")
+
+
+def check_by_status_code(status_code):
+ links = db.get_links_by_response_code(status_code)
+ for i in range(len(links)):
+ id, url = links[i]
+ response_code, destination_link = get_status_code(url)
+ if response_code == status_code:
+ db.update_response_code(id, response_code, destination_link)
+ print("Progress: ", i + 1, "/", LINKLEN, end="\r")
+ time.sleep(1)
+ print("Done")
+
+
+if __name__ == "__main__":
+ main() # checks all with code 0
+ # check_by_status_code(429) # checks titles with timeout
diff --git a/webscraper.py b/webscraper.py
new file mode 100644
index 0000000..027354e
--- /dev/null
+++ b/webscraper.py
@@ -0,0 +1,63 @@
+import requests
+from ratelimit import limits, sleep_and_retry
+from src.database import Database
+import xml.etree.ElementTree as ET
+
+
+db = Database("lfer.db")
+
+
+@sleep_and_retry
+@limits(calls=10, period=1)
+def fetch_data(ppn):
+ api_url = f"https://sru.bsz-bw.de/swb?version=1.1&query=pica.ppn%3D{ppn}&operation=searchRetrieve&maximumRecords=10&recordSchema=marcxmlk10os"
+ response = requests.get(api_url)
+ return response.text
+
+
+def process_response(response):
+ """Extracts URLs from datafield 856, subfield u."""
+ try:
+ root = ET.fromstring(response)
+ namespace = {
+ "zs": "http://www.loc.gov/zing/srw/",
+ "marc": "http://www.loc.gov/MARC21/slim",
+ }
+
+ # Find all recordData elements
+ record_data = root.find(".//zs:recordData", namespace)
+ if record_data is None:
+ return None
+
+ # Find all datafield 856 elements
+ links = []
+ for datafield in record_data.findall(
+ ".//marc:datafield[@tag='856']", namespace
+ ):
+ for subfield in datafield.findall("marc:subfield[@code='u']", namespace):
+ links.append(subfield.text)
+
+ return links if links else None
+ except ET.ParseError:
+ return None
+
+
+def get_data():
+ with open("ppnlist.txt", "r") as f:
+ ppns = f.read()
+ ppns = ppns.split("\n")
+ for ppn in ppns:
+ data = fetch_data(ppn)
+ links = process_response(data)
+ if links is None:
+ db.add_data(ppn, "Error: No data found")
+ else:
+ for link in links:
+ db.add_data(ppn, link)
+ print("Progress: ", ppns.index(ppn) + 1, "/", len(ppns), end="\r")
+
+
+if __name__ == "__main__":
+ print("Hello from webscraper!\nScraping the list of PPNs...")
+ get_data()
+ print("Done")