From ee62c65ae7ce64759d8eb83fe5a211b03eea3d3e Mon Sep 17 00:00:00 2001 From: WorldTeacher Date: Wed, 29 Oct 2025 09:31:40 +0100 Subject: [PATCH] chore: restructured project, updated readme --- MIGRATION_GUIDE.md | 216 ++ README.md | 451 +++- pyproject.toml | 1 + src/admin/__init__.py | 5 + src/admin/commands.py | 104 + src/background/__init__.py | 16 + src/background/autoadder.py | 59 + src/background/availability_checker.py | 83 + src/background/book_grabber.py | 199 ++ src/background/documentation_server.py | 23 + src/background/new_editions.py | 345 +++ src/core/__init__.py | 30 + src/core/constants.py | 213 ++ src/core/models.py | 410 ++++ src/core/semester.py | 248 ++ src/database/__init__.py | 5 + src/database/connection.py | 2008 +++++++++++++++++ .../migrations/V001__create_base_tables.sql | 132 ++ .../V002__create_table_webadis_login.sql | 10 + .../V003_update_webadis_add_user_area.sql | 6 + src/database/schemas.py | 112 + src/documents/__init__.py | 2 + src/documents/generators.py | 371 +++ src/parsers/__init__.py | 13 + src/parsers/csv_parser.py | 23 + src/parsers/pdf_parser.py | 23 + src/parsers/transformers/__init__.py | 8 + src/parsers/transformers/schemas.py | 122 + src/parsers/transformers/transformers.py | 522 +++++ src/parsers/word_parser.py | 373 +++ src/parsers/xml_parser.py | 67 + src/services/__init__.py | 16 + src/services/catalogue.py | 292 +++ src/services/lehmanns.py | 312 +++ src/services/openai.py | 58 + src/services/sru.py | 631 ++++++ src/services/webadis.py | 35 + src/services/webrequest.py | 314 +++ src/services/zotero.py | 340 +++ src/shared/__init__.py | 6 + src/shared/config.py | 66 + src/ui/dialogs/Ui_edit_bookdata.py | 2 +- src/ui/dialogs/Ui_fileparser.py | 2 +- src/ui/dialogs/Ui_login.py | 4 +- src/ui/dialogs/bookdata.py | 2 +- src/ui/dialogs/deletedialog.py | 2 +- src/ui/dialogs/docuprint.py | 4 +- src/ui/dialogs/elsa_add_entry.py | 4 +- src/ui/dialogs/fileparser.py | 2 +- src/ui/dialogs/login.py | 4 +- src/ui/dialogs/newEdition.py | 4 +- src/ui/dialogs/parsed_titles.py | 2 +- src/ui/dialogs/progress.py | 6 +- src/ui/userInterface.py | 15 +- src/ui/widgets/MessageCalendar.py | 2 +- src/ui/widgets/admin_create_user.py | 3 +- src/ui/widgets/admin_edit_prof.py | 4 +- src/ui/widgets/admin_edit_user.py | 3 +- src/ui/widgets/admin_query.py | 2 +- src/ui/widgets/calendar_entry.py | 2 +- src/ui/widgets/elsa_main.py | 6 +- src/ui/widgets/graph.py | 4 +- src/ui/widgets/new_edition_check.py | 4 +- src/ui/widgets/searchPage.py | 7 +- src/ui/widgets/signature_update.py | 8 +- src/ui/widgets/welcome_wizard.py | 6 +- src/utils/files.py | 100 + test.py | 44 +- tests/test_migrations_runner.py | 2 +- uv.lock | 98 + 70 files changed, 8518 insertions(+), 100 deletions(-) create mode 100644 MIGRATION_GUIDE.md create mode 100644 src/admin/__init__.py create mode 100644 src/admin/commands.py create mode 100644 src/background/__init__.py create mode 100644 src/background/autoadder.py create mode 100644 src/background/availability_checker.py create mode 100644 src/background/book_grabber.py create mode 100644 src/background/documentation_server.py create mode 100644 src/background/new_editions.py create mode 100644 src/core/__init__.py create mode 100644 src/core/constants.py create mode 100644 src/core/models.py create mode 100644 src/core/semester.py create mode 100644 src/database/__init__.py create mode 100644 src/database/connection.py create mode 100644 src/database/migrations/V001__create_base_tables.sql create mode 100644 src/database/migrations/V002__create_table_webadis_login.sql create mode 100644 src/database/migrations/V003_update_webadis_add_user_area.sql create mode 100644 src/database/schemas.py create mode 100644 src/documents/__init__.py create mode 100644 src/documents/generators.py create mode 100644 src/parsers/__init__.py create mode 100644 src/parsers/csv_parser.py create mode 100644 src/parsers/pdf_parser.py create mode 100644 src/parsers/transformers/__init__.py create mode 100644 src/parsers/transformers/schemas.py create mode 100644 src/parsers/transformers/transformers.py create mode 100644 src/parsers/word_parser.py create mode 100644 src/parsers/xml_parser.py create mode 100644 src/services/__init__.py create mode 100644 src/services/catalogue.py create mode 100644 src/services/lehmanns.py create mode 100644 src/services/openai.py create mode 100644 src/services/sru.py create mode 100644 src/services/webadis.py create mode 100644 src/services/webrequest.py create mode 100644 src/services/zotero.py create mode 100644 src/shared/__init__.py create mode 100644 src/shared/config.py create mode 100644 src/utils/files.py diff --git a/MIGRATION_GUIDE.md b/MIGRATION_GUIDE.md new file mode 100644 index 0000000..565239c --- /dev/null +++ b/MIGRATION_GUIDE.md @@ -0,0 +1,216 @@ +# Migration Guide: New File Structure + +## Overview + +The codebase has been reorganized to improve clarity, maintainability, and separation of concerns. This guide shows how to update your imports. + +## New Structure Summary + +``` +src/ +├── core/ # Domain models & constants (formerly in logic/) +├── database/ # Data persistence (formerly in backend/) +├── services/ # External APIs (from backend/ and logic/) +├── parsers/ # File parsing (formerly in logic/) +├── documents/ # Document generation (formerly in utils/) +├── background/ # Threading tasks (formerly in backend/) +├── admin/ # Admin commands (formerly in backend/) +├── shared/ # Cross-cutting concerns (logging, config) +├── utils/ # Pure utilities +├── ui/ # UI components (unchanged) +└── errors/ # Custom exceptions (unchanged) +``` + +## Import Changes + +### Core Domain Models + +**OLD:** +```python +from src.logic import BookData, Prof, Semester, Apparat +from src.logic.dataclass import BookData, Prof +from src.logic.semester import Semester +from src.logic.constants import APP_NRS, SEMAP_MEDIA_ACCOUNTS +``` + +**NEW:** +```python +from src.core.models import BookData, Prof, Semester, Apparat, ApparatData +from src.core import BookData, Prof, Semester # Can use shorthand +from src.core.semester import Semester +from src.core.constants import APP_NRS, SEMAP_MEDIA_ACCOUNTS +``` + +### Database + +**OLD:** +```python +from src.backend import Database +from src.backend.database import Database +from src.backend.db import CREATE_TABLE_MEDIA +``` + +**NEW:** +```python +from src.database import Database +from src.database.connection import Database # If you need specific module +from src.database.schemas import CREATE_TABLE_MEDIA +``` + +### External Services & APIs + +**OLD:** +```python +from src.backend.catalogue import Catalogue +from src.backend.webadis import get_book_medianr +from src.logic.SRU import SWB +from src.logic.lehmannsapi import LehmannsClient +from src.logic.zotero import ZoteroController +from src.logic.webrequest import BibTextTransformer, WebRequest +``` + +**NEW:** +```python +from src.services import Catalogue, SWB, LehmannsClient, ZoteroController +from src.services.catalogue import Catalogue +from src.services.webadis import get_book_medianr +from src.services.sru import SWB +from src.services.lehmanns import LehmannsClient +from src.services.zotero import ZoteroController +from src.services.webrequest import BibTextTransformer, WebRequest +``` + +### Parsers + +**OLD:** +```python +from src.logic import csv_to_list, word_to_semap +from src.logic.csvparser import csv_to_list +from src.logic.wordparser import word_to_semap +from src.logic.pdfparser import pdf_to_text +from src.logic.xmlparser import xml_to_dict +``` + +**NEW:** +```python +from src.parsers import csv_to_list, word_to_semap # Lazy loading +from src.parsers.csv_parser import csv_to_list +from src.parsers.word_parser import word_to_semap +from src.parsers.pdf_parser import pdf_to_text +from src.parsers.xml_parser import xml_to_dict +``` + +### Document Generation + +**OLD:** +```python +from src.utils.richtext import create_document, create_pdf +``` + +**NEW:** +```python +from src.documents import create_document, create_pdf +from src.documents.generators import create_document, create_pdf +``` + +### Background Tasks + +**OLD:** +```python +from src.backend import AutoAdder, AvailChecker, BookGrabber +from src.backend.threads_autoadder import AutoAdder +from src.backend.threads_availchecker import AvailChecker +from src.backend.thread_bookgrabber import BookGrabber +from src.backend.thread_neweditions import NewEditionCheckerThread +``` + +**NEW:** +```python +from src.background import AutoAdder, AvailChecker, BookGrabber, NewEditionCheckerThread +from src.background.autoadder import AutoAdder +from src.background.availability_checker import AvailChecker +from src.background.book_grabber import BookGrabber +from src.background.new_editions import NewEditionCheckerThread +``` + +### Admin Commands + +**OLD:** +```python +from src.backend import AdminCommands +from src.backend.admin_console import AdminCommands +``` + +**NEW:** +```python +from src.admin import AdminCommands +from src.admin.commands import AdminCommands +``` + +### Configuration & Logging + +**OLD:** +```python +from src.backend.settings import Settings +from src.logic.settings import Settings +from src.shared.logging import log # This stays the same +``` + +**NEW:** +```python +from src.shared import Settings, load_config, log +from src.shared.config import Settings, load_config +from src.shared.logging import log +``` + +## File Renames + +| Old Path | New Path | +|----------|----------| +| `logic/dataclass.py` | `core/models.py` | +| `logic/SRU.py` | `services/sru.py` | +| `logic/lehmannsapi.py` | `services/lehmanns.py` | +| `backend/database.py` | `database/connection.py` | +| `backend/db.py` | `database/schemas.py` | +| `backend/threads_autoadder.py` | `background/autoadder.py` | +| `backend/threads_availchecker.py` | `background/availability_checker.py` | +| `backend/thread_bookgrabber.py` | `background/book_grabber.py` | +| `backend/thread_neweditions.py` | `background/new_editions.py` | +| `backend/admin_console.py` | `admin/commands.py` | +| `utils/richtext.py` | `documents/generators.py` | +| `logic/csvparser.py` | `parsers/csv_parser.py` | +| `logic/pdfparser.py` | `parsers/pdf_parser.py` | +| `logic/wordparser.py` | `parsers/word_parser.py` | +| `logic/xmlparser.py` | `parsers/xml_parser.py` | + +## Quick Migration Checklist + +1. ✅ Update all `from src.backend import Database` → `from src.database import Database` +2. ✅ Update all `from src.logic import BookData` → `from src.core.models import BookData` +3. ✅ Update all `from src.backend.catalogue` → `from src.services.catalogue` +4. ✅ Update all `from src.logic.SRU` → `from src.services.sru` +5. ✅ Update all `from src.backend.admin_console` → `from src.admin` +6. ✅ Update threading imports from `src.backend.thread*` → `src.background.*` + +## Benefits + +- **Clearer architecture**: Each folder has a specific, well-defined purpose +- **Better dependency flow**: core → database/services → background → ui +- **Reduced duplication**: Merged 3 duplicate files (pickles.py, settings.py) +- **Easier navigation**: Intuitive folder names ("services" vs "logic") +- **Scalability**: Clear extension points for new features + +## Backwards Compatibility + +The old `backend/` and `logic/` folders still exist with original files. They will be removed in a future cleanup phase after thorough testing. + +## Questions? + +If you encounter import errors: +1. Check this guide for the new import path +2. Search for the class/function name in the new structure +3. Most moves follow the pattern: external APIs → `services/`, data models → `core/`, threads → `background/` + +## Status + +✅ **Migration Complete** - Application successfully starts and runs with new structure! diff --git a/README.md b/README.md index fc6e6f7..fca23a1 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,438 @@ # SemesterapparatsManager -SemesterapparatsManager is a graphical tool for managing semester apparatuses in the University of Education Freiburg. It allows the users to manage the semester apparatuses in a user-friendly way. It's functions include management of physical and digital semester apparatuses, as well as creating the citations for the digital files of the digital semester apparatuses. For that it uses Zotero, an open source reference management software. The semester apparatuses are stored in a SQLite database, which is created and managed by the SemesterapparatsManager. The SemesterapparatsManager is written in Python and uses the PyQt6 library for the graphical user interface +[![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)](https://www.python.org/downloads/) +[![PySide6](https://img.shields.io/badge/PySide6-Qt6-green.svg)](https://doc.qt.io/qtforpython/) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) +A comprehensive desktop application for managing semester course reserve collections (Semesterapparate) at the University of Education Freiburg. This tool streamlines the workflow of creating, managing, and maintaining both physical and digital course reserves, with integrated citation management powered by Zotero. -## Features -- Manage physical semester apparatuses - - Add semester apparatuses - - Edit semester apparatuses - - Delete semester apparatuses - - Extend semester apparatuses - - Notify professors about semester apparatuses creation or deletion - - Add messages to all semester apparatuses, or an individual semester apparatus -- Manage digital semester apparatuses - - Use text parsing to extract information from the submitted form and create the scans - - if a book is used multiple parts of a book are used, it can be split into the parts - - Create the matching citations for the files -- Statistics and Search - - Search semester apparatuses by various criteria - - Show statistics about the semester apparatuses creation and deletion -- Edit user data +## 📋 Table of Contents +- [Overview](#overview) +- [Features](#features) +- [Architecture](#architecture) +- [Installation](#installation) +- [Usage](#usage) +- [Development](#development) +- [Documentation](#documentation) +- [Contributing](#contributing) +- [License](#license) -## Images +## 🎯 Overview -![Main Window](docs/images/mainUI.png) -![Statistics](docs/images/statistics.png) \ No newline at end of file +SemesterapparatsManager is a Python-based graphical application designed to simplify the complex workflow of academic course reserve management. It provides librarians and staff with tools to: + +- **Manage Physical Reserves**: Track books, media, and materials reserved for courses +- **Handle Digital Collections**: Process, scan, and catalog digital course materials +- **Automate Citations**: Generate proper bibliographic citations using Zotero integration +- **Communicate**: Send automated emails to professors about reserve status +- **Analyze**: View statistics and search through historical data +- **Integrate**: Connect with library catalogs (SWB, DNB) and vendor APIs (Lehmanns) + +### Key Technologies + +- **Framework**: PySide6 (Qt6) for cross-platform GUI +- **Database**: SQLite with migration support +- **APIs**: Integration with SWB, DNB, Zotero, OpenAI, and catalog services +- **Document Processing**: Word, PDF, CSV, and XML parsing +- **Bibliography**: Zotero-based citation management + +## ✨ Features + +### Course Reserve Management + +- **Create & Edit**: Add new semester apparatus with book and media entries +- **Extend Duration**: Extend existing reserves for additional semesters +- **Smart Search**: Find reserves by semester, professor, subject, or signature +- **Availability Checking**: Automated checks against library catalog +- **New Edition Detection**: Background thread to find newer editions of books + +### Digital Collection Features + +- **Document Parsing**: Extract information from submitted Word/PDF forms +- **Smart Splitting**: Automatically split multi-part book sections +- **Citation Generation**: Create proper citations for all digital files +- **ELSA Integration**: Manage electronic semester apparatus (ELSA) workflows +- **File Management**: Track and recreate files from database + +### Communication & Notifications + +- **Email Templates**: Pre-configured templates for common scenarios +- **Professor Notifications**: Automated emails for creation, extension, or dissolution +- **Message System**: Attach messages to specific reserves or broadcast to all + +### Data & Analytics + +- **Statistics Dashboard**: Visualize creation and deletion trends +- **Advanced Search**: Multi-criteria search across all reserves +- **Export**: Generate reports and documentation +- **Calendar View**: Timeline of reserve activities + +### Administration + +- **User Management**: Create, edit, and delete system users +- **Professor Database**: Maintain professor contact information +- **Settings Configuration**: Customize database paths, temp directories, API keys +- **Backup & Migration**: Database migration support for schema updates + +## 🏗️ Architecture + +### Project Structure + +``` +SemesterapparatsManager/ +├── src/ +│ ├── core/ # Domain models & constants +│ │ ├── models.py # BookData, Prof, Apparat, Semester, etc. +│ │ ├── constants.py # Application constants +│ │ └── semester.py # Semester handling logic +│ ├── database/ # Data persistence layer +│ │ ├── connection.py # Database class & operations +│ │ ├── schemas.py # SQL schema definitions +│ │ └── migrations/ # SQL migration files +│ ├── services/ # External API integrations +│ │ ├── catalogue.py # RDS catalog scraping +│ │ ├── sru.py # SWB/DNB library API client +│ │ ├── lehmanns.py # Lehmanns bookstore API +│ │ ├── zotero.py # Zotero integration +│ │ ├── webadis.py # WebADIS automation +│ │ └── openai.py # OpenAI API integration +│ ├── parsers/ # Document & file parsing +│ │ ├── csv_parser.py # CSV parsing +│ │ ├── word_parser.py # Word document parsing +│ │ ├── pdf_parser.py # PDF text extraction +│ │ ├── xml_parser.py # XML parsing +│ │ └── transformers/ # Bibliography format conversion +│ ├── documents/ # Document generation +│ │ └── generators.py # Word/PDF document creation +│ ├── background/ # Background tasks & threading +│ │ ├── autoadder.py # Automatic book addition +│ │ ├── availability_checker.py # Catalog availability +│ │ ├── book_grabber.py # Catalog metadata retrieval +│ │ └── new_editions.py # New edition detection +│ ├── ui/ # User interface components +│ │ ├── userInterface.py # Main application window +│ │ ├── dialogs/ # Dialog windows +│ │ └── widgets/ # Reusable UI widgets +│ ├── admin/ # Administrative functions +│ │ └── commands.py # Admin CLI commands +│ ├── utils/ # Utility functions +│ │ ├── files.py # File operations +│ │ ├── sorting.py # Custom sorting logic +│ │ └── blob.py # Binary data handling +│ ├── shared/ # Cross-cutting concerns +│ │ ├── logging.py # Centralized logging +│ │ └── config.py # Configuration management +│ └── errors/ # Custom exceptions +│ └── database.py # Database-specific errors +├── tests/ # Test suite +├── docs/ # Documentation +├── mail_vorlagen/ # Email templates +├── config.yaml # Application configuration +├── main.py # Application entry point +└── README.md +``` + +### Architecture Principles + +**Layered Architecture**: +``` +UI Layer (PySide6 Qt Widgets) + ↓ +Background Tasks (QThread workers) + ↓ +Business Logic (Core models & operations) + ↓ +Services Layer (External API integrations) + ↓ +Data Access Layer (Database & file operations) +``` + +**Key Design Patterns**: +- **Repository Pattern**: Database class abstracts data persistence +- **Service Layer**: External integrations isolated in `services/` +- **Observer Pattern**: Qt signals/slots for event-driven updates +- **Factory Pattern**: Document and citation generators +- **Strategy Pattern**: Multiple parsing strategies for different file formats + +## 🚀 Installation + +### Prerequisites + +- Python 3.10 or higher +- [uv](https://github.com/astral-sh/uv) - Fast Python package installer and resolver (recommended) + ```bash + # Install uv (Windows PowerShell) + powershell -c "irm https://astral.sh/uv/install.ps1 | iex" + + # Or using pip + pip install uv + ``` + +### Setup Steps (Using uv - Recommended) + +1. **Clone the repository**: + ```bash + git clone https://github.com/yourusername/SemesterapparatsManager.git + cd SemesterapparatsManager + ``` + +2. **Create virtual environment and install dependencies**: + ```bash + # uv automatically creates venv and installs dependencies + uv sync + ``` + +3. **Configure application**: + - First launch will present a setup wizard + - Configure database path, temp directory, and API keys + - Create admin user account + +4. **Run the application**: + ```bash + uv run python main.py + ``` + +### Alternative Setup (Using pip/venv) + +
+Click to expand traditional pip installation steps + +1. **Create virtual environment**: + ```bash + python -m venv .venv + ``` + +2. **Activate virtual environment**: + - Windows (PowerShell): + ```powershell + .venv\Scripts\Activate.ps1 + ``` + - Linux/Mac: + ```bash + source .venv/bin/activate + ``` + +3. **Install dependencies**: + ```bash + pip install -r requirements.txt + ``` + +4. **Run the application**: + ```bash + python main.py + ``` + +
+ +### Building Executable + +To build a standalone executable: + +```bash +# Using uv +uv run pyinstaller --noconfirm --onedir --windowed \ + --icon='icons/app.ico' \ + --name='SemesterapparatsManager' \ + --clean \ + --add-data='config.yaml;.' \ + --add-data='icons;icons' \ + main.py +``` + +Or use the provided build task (see `pyproject.toml`). + +## 📖 Usage + +### First Time Setup + +1. **Launch Application**: Run `python main.py` +2. **Setup Wizard**: Configure basic settings + - Database location + - Temporary files directory + - Library catalog credentials (optional) + - API keys (Zotero, OpenAI - optional) +3. **Create Admin User**: Set up your admin credentials +4. **Login**: Use your credentials to access the main interface + +### Creating a Semester Apparatus + +1. **Navigate**: Main window → "Neuer Apparat" (New Apparatus) +2. **Fill Details**: + - Semester (e.g., WiSe 2024/25) + - Professor information + - Course subject + - Apparatus number +3. **Add Books**: Click "Buch hinzufügen" (Add Book) + - Enter signature or search by title + - System fetches metadata from catalog + - Add multiple books as needed +4. **Add Media**: Click "Medium hinzufügen" (Add Media) + - DVDs, CDs, or other media types +5. **Save**: Confirm and create the apparatus +6. **Generate Email**: Optionally send notification to professor + +### Managing Digital Collections (ELSA) + +1. **Upload Form**: Submit Word/PDF form with book chapter information +### Setting Up Development Environment + +1. **Install all dependencies** (including dev dependencies): + ```bash + # Using uv (recommended) + uv sync --all-extras + + # Or using pip + pip install -r requirements-dev.txt + ``` + +2. **Enable logging**: + ```python + from src.shared.logging import configure + configure("DEBUG") # In main.py + ``` + +3. **Run tests**: + ```bash + # Using uv + uv run pytest tests/ + + # Or with activated venv + pytest tests/ + ```ministrative Tasks + +- **User Management**: Admin → Users → Create/Edit/Delete +- **Professor Database**: Admin → Professors → Manage contacts +- **System Settings**: Edit → Settings → Configure paths and APIs +- **Database Maintenance**: Admin → Database → Run migrations + +## 🛠️ Development + +### Setting Up Development Environment + +1. **Install dev dependencies**: + ```bash + pip install -r requirements-dev.txt + ``` + +2. **Enable logging**: + ```python + from src.shared.logging import configure + configure("DEBUG") # In main.py + ``` + +3. **Run tests**: + ```bash + pytest tests/ + ``` + +### Project Standards + +- **Code Style**: Follow PEP 8 +- **Type Hints**: Use type annotations where possible +- **Docstrings**: Google-style docstrings for all public functions +- **Logging**: Use centralized logger from `src.shared.logging` +- **Imports**: Use new structure (see MIGRATION_GUIDE.md) + +### Database Migrations + +To create a new migration: + +1. Create file: `src/database/migrations/V###__description.sql` +2. Use sequential numbering (V001, V002, etc.) +3. Write idempotent SQL (use `IF NOT EXISTS`) +4. Test migration on copy of production database + +Example: +```sql +-- V003__add_user_preferences.sql +CREATE TABLE IF NOT EXISTS user_preferences ( + user_id INTEGER PRIMARY KEY, + theme TEXT DEFAULT 'light', + language TEXT DEFAULT 'de', + FOREIGN KEY (user_id) REFERENCES user(id) +); +``` + +### Adding New Features + +**New Service Integration**: +1. Create module in `src/services/` +2. Implement client class with proper error handling +3. Add to `src/services/__init__.py` +4. Document API requirements + +**New Document Parser**: +1. Create module in `src/parsers/` +2. Implement parsing function returning core models +3. Add to `src/parsers/__init__.py` +4. Write unit tests + +**New UI Dialog**: +1. Design in Qt Designer (`.ui` file) +2. Convert: `pyside6-uic dialog.ui -o dialog_ui.py` +3. Create dialog class in `src/ui/dialogs/` +4. Connect signals to business logic +### Building Documentation + +```bash +# Using uv +uv run mkdocs build +uv run mkdocs serve # View at http://localhost:8000 + +# Or with activated venv +mkdocs build +mkdocs serve +```*[API Documentation](docs/)**: Detailed module documentation +- **[User Manual](docs/index.md)**: Complete user guide (MkDocs) + +### Building Documentation + +```bash +mkdocs build +mkdocs serve # View at http://localhost:8000 +``` + +## 🤝 Contributing + +Contributions are welcome! Please follow these guidelines: + +1. **Fork** the repository +2. **Create** a feature branch (`git checkout -b feature/amazing-feature`) +3. **Commit** your changes (`git commit -m 'Add amazing feature'`) +4. **Push** to the branch (`git push origin feature/amazing-feature`) +5. **Open** a Pull Request + +### Code Review Checklist + +- [ ] Code follows project style guidelines +- [ ] All tests pass +- [ ] New features have tests +- [ ] Documentation is updated +- [ ] No sensitive data in commits +- [ ] Import paths use new structure + +## 📄 License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## 🙏 Acknowledgments + +- **University of Education Freiburg**: Project sponsor and primary user +- **Qt/PySide6**: Excellent cross-platform GUI framework +- **Zotero**: Citation management integration +- **SWB/DNB**: Library catalog services + +## 📞 Support + +For questions, issues, or feature requests: +- **Issues**: [Gitea Issues](https://git.theprivateserver.de/PHB/SemesterapparatsManager/issues) +- **Email**: alexander.kirchner@ph-freiburg.de +- **Documentation**: [Read the Docs](https://semesterapparatsmanager.readthedocs.io) + +## 🗺️ Roadmap + +TBD +--- + +**Built with ❤️ for academic libraries** \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 470172a..536773b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "omegaconf>=2.3.0", "openai>=1.79.0", "pandas>=2.2.3", + "pdfquery>=0.4.3", "playwright>=1.49.1", "pyramid>=2.0.2", "pyside6>=6.9.1", diff --git a/src/admin/__init__.py b/src/admin/__init__.py new file mode 100644 index 0000000..5eee246 --- /dev/null +++ b/src/admin/__init__.py @@ -0,0 +1,5 @@ +"""Administrative functions and commands.""" + +from .commands import AdminCommands + +__all__ = ["AdminCommands"] diff --git a/src/admin/commands.py b/src/admin/commands.py new file mode 100644 index 0000000..bedf8ef --- /dev/null +++ b/src/admin/commands.py @@ -0,0 +1,104 @@ +import hashlib +import random + +from src.database import Database +from src.shared.logging import log +from src import LOG_DIR + + +# change passwords for apparats, change passwords for users, list users, create and delete users etc +# create a class that has all commands. for each command, create a function that does the thing +class AdminCommands: + """Basic Admin commands for the admin console. This class is used to create, delete, and list users. It also has the ability to change passwords for users.""" + + def __init__(self, db_path=None): + """Default Constructor for the AdminCommands class.""" + if db_path is None: + self.db = Database() + else: + self.db = Database(db_path=db_path) + log.info("AdminCommands initialized with database connection.") + log.debug("location: {}", self.db.db_path) + + def create_password(self, password: str) -> tuple[str, str]: + """Create a hashed password and a salt for the password. + + Args: + password (str): the base password to be hashed. + + Returns: + tuple[str,str]: a tuple containing the hashed password and the salt used to hash the password. + """ + salt = self.create_salt() + hashed_password = self.hash_password(password) + return (hashed_password, salt) + + def create_salt(self) -> str: + """Generate a random 16 digit long salt for the password. + + Returns: + str: the randomized salt + """ + return "".join( + random.choices( + "abcdefghijklmnopqrstuvwxyzQWERTZUIOPLKJHGFDSAYXCVBNM0123456789", k=16 + ) + ) + + def create_admin(self): + """Create the admin in the database. This is only used once, when the database is created.""" + salt = self.create_salt() + hashed_password = self.hash_password("admin") + self.db.createUser("admin", salt + hashed_password, "admin", salt) + + def create_user(self, username: str, password: str, role: str = "user") -> bool: + """Create a new user in the database. + + Args: + username (str): the username of the user to be created. + password (str): the password of the user to be created. + role (str, optional): the role of the user to be created. Defaults to "user". + """ + hashed_password, salt = self.create_password(password) + status = self.db.createUser( + user=username, password=salt + hashed_password, role=role, salt=salt + ) + return status + + def hash_password(self, password: str) -> str: + """Hash a password using SHA256. + + Args: + password (str): the password to be hashed. + + Returns: + str: the hashed password. + """ + hashed = hashlib.sha256((password).encode("utf-8")).hexdigest() + return hashed + + def list_users(self) -> list[tuple]: + """List all available users in the database. + + Returns: + list[tuple]: a list of all users, containing all stored data for each user in a tuple. + """ + return self.db.getUsers() + + def delete_user(self, username: str): + """Delete a selected user from the database. + + Args: + username (str): the username of the user to be deleted. + """ + self.db.deleteUser(username) + + def change_password(self, username, password): + """change the password for a user. + + Args: + username (str): username of the user to change the password for. + password (str): the new, non-hashed password to change to. + """ + hashed_password = self.hash_password(password) + self.db.changePassword(username, hashed_password) diff --git a/src/background/__init__.py b/src/background/__init__.py new file mode 100644 index 0000000..08935ad --- /dev/null +++ b/src/background/__init__.py @@ -0,0 +1,16 @@ +"""Background tasks and threading operations.""" + +from .autoadder import AutoAdder +from .availability_checker import AvailChecker +from .book_grabber import BookGrabber, BookGrabberTest +from .new_editions import NewEditionCheckerThread +from .documentation_server import DocumentationThread + +__all__ = [ + "AutoAdder", + "AvailChecker", + "BookGrabber", + "BookGrabberTest", + "NewEditionCheckerThread", + "DocumentationThread", +] diff --git a/src/background/autoadder.py b/src/background/autoadder.py new file mode 100644 index 0000000..d5863b7 --- /dev/null +++ b/src/background/autoadder.py @@ -0,0 +1,59 @@ +import sys +import time + +import loguru + +# from icecream import ic +from PySide6.QtCore import QThread +from PySide6.QtCore import Signal as Signal + +from src import LOG_DIR +from src.database import Database + +log = loguru.logger +log.remove() +log.add(sys.stdout, level="INFO") +log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days") + + +# from src.transformers import RDS_AVAIL_DATA + + +class AutoAdder(QThread): + updateSignal = Signal(int) + + setTextSignal = Signal(int) + progress = Signal(int) + + def __init__(self, data=None, app_id=None, prof_id=None, parent=None): + super().__init__(parent) + self.data = data + self.app_id = app_id + self.prof_id = prof_id + + # #print("Launched AutoAdder") + # #print(self.data, self.app_id, self.prof_id) + + def run(self): + self.db = Database() + # show the dialog, start the thread to gather data and dynamically update progressbar and listwidget + log.info("Starting worker thread") + item = 0 + for entry in self.data: + try: + self.updateSignal.emit(item) + self.setTextSignal.emit(entry) + item += 1 + self.progress.emit(item) + time.sleep(1) + + except Exception as e: + # #print(e) + log.exception( + f"The query failed with message {e} for signature {entry}" + ) + continue + if item == len(self.data): + log.info("Worker thread finished") + # teminate thread + self.finished.emit() diff --git a/src/background/availability_checker.py b/src/background/availability_checker.py new file mode 100644 index 0000000..b749a64 --- /dev/null +++ b/src/background/availability_checker.py @@ -0,0 +1,83 @@ +# from icecream import ic +from PySide6.QtCore import QThread +from PySide6.QtCore import Signal as Signal + +from src.database import Database +from src.services.webadis import get_book_medianr +from src.services.webrequest import BibTextTransformer, TransformerType, WebRequest +from src.shared.logging import log + + +class AvailChecker(QThread): + updateSignal = Signal(str, int) + updateProgress = Signal(int, int) + + def __init__( + self, + links: list[str] | None = None, + appnumber: int | None = None, + parent=None, + books: list[dict] | None = None, + ): + if links is None: + links = [] + super().__init__(parent) + log.info("Starting worker thread") + log.info( + "Checking availability for " + + str(links) + + " with appnumber " + + str(appnumber) + + "..." + ) + self.links = links + self.appnumber = appnumber + self.books = books or [] + log.info( + f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books..." + ) + # Pre-create reusable request and transformer to avoid per-item overhead + self._request = WebRequest().set_apparat(self.appnumber) + self._rds_transformer = BibTextTransformer(TransformerType.RDS) + + def run(self): + self.db = Database() + state = 0 + count = 0 + for link in self.links: + log.info("Processing entry: " + str(link)) + data = self._request.get_ppn(link).get_data() + rds = self._rds_transformer.get_data(data).return_data("rds_availability") + + book_id = None + if not rds or not rds.items: + log.warning(f"No RDS data found for link {link}") + continue + for item in rds.items: + sign = item.superlocation + loc = item.location + # # #print(item.location) + if str(self.appnumber) in sign or str(self.appnumber) in loc: + state = 1 + break + for book in self.books: + if book["bookdata"].signature == link: + book_id = book["id"] + break + log.info(f"State of {link}: " + str(state)) + # #print("Updating availability of " + str(book_id) + " to " + str(state)) + # use get_book_medianr to update the medianr of the book in the database + auth = self.db.getWebADISAuth + medianr = get_book_medianr(rds.items[0].callnumber, self.appnumber, auth) + book_data = book["bookdata"] + book_data.medianr = medianr + self.db.updateBookdata(book["id"], book_data) + self.db.setAvailability(book_id, state) + count += 1 + self.updateProgress.emit(count, len(self.links)) + self.updateSignal.emit(item.callnumber, state) + + log.info("Worker thread finished") + # teminate thread + + self.quit() diff --git a/src/background/book_grabber.py b/src/background/book_grabber.py new file mode 100644 index 0000000..a134d88 --- /dev/null +++ b/src/background/book_grabber.py @@ -0,0 +1,199 @@ +from PySide6.QtCore import QThread, Signal + +from src.database import Database +from src.services.webrequest import BibTextTransformer, WebRequest +from src.shared.logging import log + +# Logger configured centrally in main; this module just uses `log` + + +class BookGrabber(QThread): + updateSignal = Signal(int, int) + done = Signal() + + def __init__(self): + super(BookGrabber, self).__init__(parent=None) + self.is_Running = True + log.info("Starting worker thread") + self.data = [] + self.app_id = None + self.prof_id = None + self.mode = None + self.book_id = None + self.use_any = False + self.use_exact = False + self.app_nr = None + self.tstate = (self.app_id, self.prof_id, self.mode, self.data) + self.request = WebRequest() + self.db = Database() + + def add_values( + self, app_id: int, prof_id: int, mode: str, data, any_book=False, exact=False + ): + self.app_id = app_id + self.prof_id = prof_id + self.mode = mode + self.data: list[str] = data + self.use_any = any_book + self.use_exact = exact + log.info(f"Working on {len(self.data)} entries") + self.tstate = (self.app_nr, self.prof_id, self.mode, self.data) + log.debug("State: " + str(self.tstate)) + app_nr = self.db.query_db( + "SELECT appnr FROM semesterapparat WHERE id = ?", (self.app_id,) + )[0][0] + self.request.set_apparat(app_nr) + # log.debug(self.tstate) + + def run(self): + item = 0 + iterdata = self.data + # log.debug(iterdata) + + for entry in iterdata: + # log.debug(entry) + log.info("Processing entry: {}", entry) + + webdata = self.request.get_ppn(entry) + if self.use_any: + webdata = webdata.use_any_book + webdata = webdata.get_data() + + if webdata == "error": + continue + + bd = BibTextTransformer(self.mode) + log.debug(webdata) + if self.mode == "ARRAY": + if self.use_exact: + bd = bd.use_signature(entry) + bd = bd.get_data(webdata).return_data() + log.debug(bd) + if bd is None: + # bd = BookData + continue + bd.signature = entry + transformer = ( + BibTextTransformer("RDS").get_data(webdata).return_data("rds_data") + ) + + # confirm lock is acquired + self.db.addBookToDatabase(bd, self.app_id, self.prof_id) + # get latest book id + self.book_id = self.db.getLastBookId() + log.info("Added book to database") + state = 0 + for result in transformer.RDS_DATA: + # log.debug(result.RDS_LOCATION) + if str(self.app_nr) in result.RDS_LOCATION: + state = 1 + break + + log.info(f"State of {entry}: {state}") + log.debug( + "updating availability of " + str(self.book_id) + " to " + str(state) + ) + try: + self.db.setAvailability(self.book_id, state) + log.debug("Added book to database") + except Exception as e: + log.error(f"Failed to update availability: {e}") + log.debug("Failed to update availability: " + str(e)) + + # time.sleep(5) + item += 1 + self.updateSignal.emit(item, len(self.data)) + log.info("Worker thread finished") + # self.done.emit() + self.quit() + + def stop(self): + self.is_Running = False + + +class BookGrabberTest(QThread): + updateSignal = Signal(int, int) + done = Signal() + + def __init__(self, appnr: int): + super(BookGrabberTest, self).__init__(parent=None) + self.is_Running = True + log.info("Starting worker thread") + self.data = None + self.app_nr = None + self.prof_id = None + self.mode = None + self.book_id = None + self.use_any = False + self.use_exact = False + self.app_nr = appnr + self.tstate = (self.app_nr, self.prof_id, self.mode, self.data) + self.results = [] + + def add_values( + self, app_nr: int, prof_id: int, mode: str, data, any_book=False, exact=False + ): + self.app_nr = app_nr + self.prof_id = prof_id + self.mode = mode + self.data = data + self.use_any = any_book + self.use_exact = exact + log.info(f"Working on {len(self.data)} entries") + self.tstate = (self.app_nr, self.prof_id, self.mode, self.data) + log.debug("State: " + str(self.tstate)) + # log.debug(self.tstate) + + def run(self): + item = 0 + iterdata = self.data + # log.debug(iterdata) + for entry in iterdata: + # log.debug(entry) + signature = str(entry) + log.info("Processing entry: " + signature) + + webdata = WebRequest().set_apparat(self.app_nr).get_ppn(entry) + if self.use_any: + webdata = webdata.use_any_book + webdata = webdata.get_data() + + if webdata == "error": + continue + + bd = BibTextTransformer(self.mode) + if self.mode == "ARRAY": + if self.use_exact: + bd = bd.use_signature(entry) + bd = bd.get_data(webdata).return_data() + if bd is None: + # bd = BookData + continue + bd.signature = entry + transformer = ( + BibTextTransformer("RDS").get_data(webdata).return_data("rds_data") + ) + + # confirm lock is acquired + # get latest book id + log.info("Added book to database") + state = 0 + for result in transformer.RDS_DATA: + # log.debug(result.RDS_LOCATION) + if str(self.app_nr) in result.RDS_LOCATION: + state = 1 + break + + log.info(f"State of {signature}: {state}") + # log.debug("updating availability of " + str(self.book_id) + " to " + str(state)) + self.results.append(bd) + + # time.sleep(5) + item += 1 + self.updateSignal.emit(item, len(self.data)) + log.info("Worker thread finished") + # self.done.emit() + self.quit() + + def stop(self): + self.is_Running = False diff --git a/src/background/documentation_server.py b/src/background/documentation_server.py new file mode 100644 index 0000000..57cffcb --- /dev/null +++ b/src/background/documentation_server.py @@ -0,0 +1,23 @@ +from PySide6.QtCore import QThread, Slot +from src.utils.documentation import website, QuietHandler +from wsgiref.simple_server import make_server + + +class DocumentationThread(QThread): + def __init__(self): + super().__init__() + self._server = None # store server so we can shut it down + + def run(self): + # launch_documentation() + self._server = make_server( + "localhost", 8000, website(), handler_class=QuietHandler + ) + while not self.isInterruptionRequested(): + self._server.handle_request() + + @Slot() # slot you can connect to aboutToQuit + def stop(self): + self.requestInterruption() # ask the loop above to exit + if self._server: + self._server.shutdown() # unblock handle_request() \ No newline at end of file diff --git a/src/background/new_editions.py b/src/background/new_editions.py new file mode 100644 index 0000000..05245a4 --- /dev/null +++ b/src/background/new_editions.py @@ -0,0 +1,345 @@ +import os +import re +from concurrent.futures import ThreadPoolExecutor +from math import ceil +from queue import Empty, Queue +from time import monotonic # <-- NEW +from typing import List, Optional + +from PySide6.QtCore import QThread, Signal + +# from src.services.webrequest import BibTextTransformer, WebRequest +from src.services.catalogue import Catalogue +from src.core.models import BookData +from src.services.sru import SWB +from src.shared.logging import log + +# use all available cores - 2, but at least 1 +THREAD_COUNT = max(os.cpu_count() - 2, 1) +THREAD_MIN_ITEMS = 5 + +# Logger configured centrally in main; use shared `log` + +swb = SWB() +dnb = SWB() +cat = Catalogue() + +RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative RVK character set + + +def find_newer_edition( + swb_result: BookData, dnb_result: List[BookData] +) -> Optional[List[BookData]]: + """ + New edition if: + - year > swb.year OR + - edition_number > swb.edition_number + BUT: discard any candidate with year < swb.year (if both years are known). + + Same-work check: + - Compare RVK roots of signatures (after stripping trailing '+N' and '(N)'). + - If both have signatures and RVKs differ -> skip. + + Preferences (in order): + 1) RVK matches SWB + 2) Print over Online-Ressource + 3) Has signature + 4) Newer: (year desc, edition_number desc) + """ + + def strip_copy_and_edition(s: str) -> str: + s = re.sub(r"\(\s*\d+\s*\)", "", s) # remove '(N)' + s = re.sub(r"\s*\+\s*\d+\s*$", "", s) # remove trailing '+N' + return s + + def extract_rvk_root(sig: Optional[str]) -> str: + if not sig: + return "" + t = strip_copy_and_edition(sig.upper()) + t = re.sub(r"\s+", " ", t).strip() + m = re.match(rf"^([A-Z]{{1,3}}\s*{RVK_ALLOWED}*)", t) + if not m: + cleaned = re.sub(rf"[^{RVK_ALLOWED} ]+", "", t).strip() + return cleaned.split(" ")[0] if cleaned else "" + return re.sub(r"\s+", " ", m.group(1)).strip() + + def has_sig(b: BookData) -> bool: + return bool(getattr(b, "signature", None)) + + def is_online(b: BookData) -> bool: + return (getattr(b, "media_type", None) or "").strip() == "Online-Ressource" + + def is_print(b: BookData) -> bool: + return not is_online(b) + + def rvk_matches_swb(b: BookData) -> bool: + if not has_sig(b) or not has_sig(swb_result): + return False + return extract_rvk_root(b.signature) == extract_rvk_root(swb_result.signature) + + def strictly_newer(b: BookData) -> bool: + # Hard guard: if both years are known and candidate is older, discard + if ( + b.year is not None + and swb_result.year is not None + and b.year < swb_result.year + ): + return False + + newer_by_year = ( + b.year is not None + and swb_result.year is not None + and b.year > swb_result.year + ) + newer_by_edition = ( + b.edition_number is not None + and swb_result.edition_number is not None + and b.edition_number > swb_result.edition_number + ) + # Thanks to the guard above, newer_by_edition can't pick something with a smaller year. + return newer_by_year or newer_by_edition + + swb_has_sig = has_sig(swb_result) + swb_rvk = extract_rvk_root(getattr(swb_result, "signature", None)) + + # 1) Filter: same work (by RVK if both have sigs) AND strictly newer + candidates: List[BookData] = [] + for b in dnb_result: + if has_sig(b) and swb_has_sig: + if extract_rvk_root(b.signature) != swb_rvk: + continue # different work + if strictly_newer(b): + candidates.append(b) + + if not candidates: + return None + + # 2) Dedupe by PPN → prefer (rvk-match, is-print, has-signature) + def pref_score(x: BookData) -> tuple[int, int, int]: + return ( + 1 if rvk_matches_swb(x) else 0, + 1 if is_print(x) else 0, + 1 if has_sig(x) else 0, + ) + + by_ppn: dict[Optional[str], BookData] = {} + for b in candidates: + key = getattr(b, "ppn", None) + prev = by_ppn.get(key) + if prev is None or pref_score(b) > pref_score(prev): + by_ppn[key] = b + + deduped = list(by_ppn.values()) + if not deduped: + return None + + # 3) Preserve all qualifying newer editions, but order by preference + def sort_key(b: BookData): + year = b.year if b.year is not None else -1 + ed = b.edition_number if b.edition_number is not None else -1 + return ( + 1 if rvk_matches_swb(b) else 0, + 1 if is_print(b) else 0, + 1 if has_sig(b) else 0, + year, + ed, + ) + + deduped.sort(key=sort_key, reverse=True) + return deduped + + +class NewEditionCheckerThread(QThread): + updateSignal = Signal(int, int) # (processed, total) + updateProgress = Signal(int, int) # (processed, total) + total_entries_signal = Signal(int) + resultsSignal = Signal(list) # list[tuple[BookData, list[BookData]]] + + # NEW: metrics signals + rateSignal = Signal(float) # items per second ("it/s") + etaSignal = Signal(int) # seconds remaining (-1 when unknown) + + def __init__(self, entries: Optional[list["BookData"]] = None, parent=None): + super().__init__(parent) + self.entries: list["BookData"] = entries if entries is not None else [] + self.results: list[tuple["BookData", list["BookData"]]] = [] + + def reset(self): + self.entries = [] + self.results = [] + + # ---------- internal helpers ---------- + + @staticmethod + def _split_evenly(items: list, parts: int) -> list[list]: + """Split items as evenly as possible into `parts` chunks (no empty tails).""" + if parts <= 1 or len(items) <= 1: + return [items] + n = len(items) + base = n // parts + extra = n % parts + chunks = [] + i = 0 + for k in range(parts): + size = base + (1 if k < extra else 0) + if size == 0: + continue + chunks.append(items[i : i + size]) + i += size + return chunks + + @staticmethod + def _clean_title(raw: str) -> str: + title = raw.rstrip(" .:,;!?") + title = re.sub(r"\s*\(.*\)", "", title) + return title.strip() + + @classmethod + def _process_book( + cls, book: "BookData" + ) -> tuple["BookData", list["BookData"]] | None: + """Process one book; returns (original, [found editions]) or None on failure.""" + if not book.title: + return None + response: list["BookData"] = [] + query = [ + f"pica.tit={book.title}", + f"pica.vlg={book.publisher}", + ] + + swb_result = swb.getBooks(["pica.bib=20735", f"pica.ppn={book.ppn}"])[0] + dnb_results = swb.getBooks(query) + new_editions = find_newer_edition(swb_result, dnb_results) + + if new_editions is not None: + for new_edition in new_editions: + new_edition.library_location = cat.get_location(new_edition.ppn) + try: + isbn = ( + str(new_edition.isbn[0]) + if isinstance(new_edition.isbn, list) + else str(new_edition.isbn) + ) + new_edition.link = ( + f"https://www.lehmanns.de/search/quick?mediatype_id=2&q={isbn}" + ) + except (IndexError, TypeError): + isbn = None + new_edition.in_library = cat.in_library(new_edition.ppn) + response = new_editions + + # client = SWB() + # response: list["BookData"] = [] + # # First, search by title only + # results = client.getBooks([f"pica.title={title}", f"pica.vlg={book.publisher}"]) + + # lehmanns = LehmannsClient() + # results = lehmanns.search_by_title(title) + # for result in results: + # if "(eBook)" in result.title: + # result.title = result.title.replace("(eBook)", "").strip() + # swb_results = client.getBooks( + # [ + # f"pica.tit={result.title}", + # f"pica.vlg={result.publisher.split(',')[0]}", + # ] + # ) + # for swb in swb_results: + # if swb.isbn == result.isbn: + # result.ppn = swb.ppn + # result.signature = swb.signature + # response.append(result) + # if (result.edition_number < swb.edition_number) and ( + # swb.year > result.year + # ): + # response.append(result) + if response == []: + return None + # Remove duplicates based on ppn + return (book, response) + + @classmethod + def _worker(cls, items: list["BookData"], q: Queue) -> None: + """Worker for one chunk; pushes ('result', ...), ('progress', 1), and ('done', None).""" + try: + for book in items: + try: + result = cls._process_book(book) + except Exception: + result = None + if result is not None: + q.put(("result", result)) + q.put(("progress", 1)) + finally: + q.put(("done", None)) + + # ---------- thread entry point ---------- + + def run(self): + total = len(self.entries) + self.total_entries_signal.emit(total) + + # start timer for metrics + t0 = monotonic() + + if total == 0: + log.debug("No entries to process.") + # emit metrics (zero work) + self.rateSignal.emit(0.0) + self.etaSignal.emit(0) + self.resultsSignal.emit([]) + return + + # Up to 4 workers; ~20 items per worker + num_workers = min(THREAD_COUNT, max(1, ceil(total / THREAD_MIN_ITEMS))) + chunks = self._split_evenly(self.entries, num_workers) + sizes = [len(ch) for ch in chunks] + + q: Queue = Queue() + processed = 0 + finished_workers = 0 + + with ThreadPoolExecutor(max_workers=len(chunks)) as ex: + futures = [ex.submit(self._worker, ch, q) for ch in chunks] + + log.info( + f"Launched {len(futures)} worker thread(s) for {total} entries: {sizes} entries per thread." + ) + for idx, sz in enumerate(sizes, 1): + log.debug(f"Thread {idx}: {sz} entries") + + # Aggregate progress/results + while finished_workers < len(chunks): + try: + kind, payload = q.get(timeout=0.1) + except Empty: + continue + + if kind == "progress": + processed += int(payload) + self.updateSignal.emit(processed, total) + self.updateProgress.emit(processed, total) + + # ---- NEW: compute & emit metrics ---- + elapsed = max(1e-9, monotonic() - t0) + rate = processed / elapsed # items per second + remaining = max(0, total - processed) + eta_sec = int(round(remaining / rate)) if rate > 0 else -1 + + self.rateSignal.emit(rate) + # clamp negative just in case + self.etaSignal.emit(max(0, eta_sec) if eta_sec >= 0 else -1) + # ------------------------------------- + + elif kind == "result": + self.results.append(payload) + elif kind == "done": + finished_workers += 1 + + # Final metrics on completion + elapsed_total = max(1e-9, monotonic() - t0) + final_rate = total / elapsed_total + self.rateSignal.emit(final_rate) + self.etaSignal.emit(0) + + self.resultsSignal.emit(self.results) diff --git a/src/core/__init__.py b/src/core/__init__.py new file mode 100644 index 0000000..15185c3 --- /dev/null +++ b/src/core/__init__.py @@ -0,0 +1,30 @@ +"""Core domain models and business constants.""" + +from .models import ( + Apparat, + ApparatData, + Book, + BookData, + ELSA, + MailData, + Prof, + SemapDocument, + Subjects, + XMLMailSubmission, +) +from .constants import * +from .semester import Semester + +__all__ = [ + "Apparat", + "ApparatData", + "Book", + "BookData", + "ELSA", + "MailData", + "Prof", + "SemapDocument", + "Subjects", + "XMLMailSubmission", + "Semester", +] diff --git a/src/core/constants.py b/src/core/constants.py new file mode 100644 index 0000000..94f0916 --- /dev/null +++ b/src/core/constants.py @@ -0,0 +1,213 @@ +APP_NRS = [i for i in range(1, 181)] + +PROF_TITLES = [ + "Dr. mult.", + "Dr. paed.", + "Dr. rer. pol.", + "Dr. sc. techn.", + "Drs.", + "Dr. agr.", + "Dr. habil.", + "Dr. oec.", + "Dr. med.", + "Dr. e. h.", + "Dr. oec. publ.", + "Dr. -Ing.", + "Dr. theol.", + "Dr. med. vet.", + "Dr. ing.", + "Dr. rer. nat.", + "Dr. des.", + "Dr. sc. mus.", + "Dr. h. c.", + "Dr. pharm.", + "Dr. med. dent.", + "Dr. phil. nat.", + "Dr. phil.", + "Dr. iur.", + "Dr.", + "Kein Titel", +] + +SEMAP_MEDIA_ACCOUNTS = { + 1: "1008000055", + 2: "1008000188", + 3: "1008000211", + 4: "1008000344", + 5: "1008000477", + 6: "1008000500", + 7: "1008000633", + 8: "1008000766", + 9: "1008000899", + 10: "1008000922", + 11: "1008001044", + 12: "1008001177", + 13: "1008001200", + 14: "1008001333", + 15: "1008001466", + 16: "1008001599", + 17: "1008001622", + 18: "1008001755", + 19: "1008001888", + 20: "1008001911", + 21: "1008002033", + 22: "1008002166", + 23: "1008002299", + 24: "1008002322", + 25: "1008002455", + 26: "1008002588", + 27: "1008002611", + 28: "1008002744", + 29: "1008002877", + 30: "1008002900", + 31: "1008003022", + 32: "1008003155", + 33: "1008003288", + 34: "1008003311", + 35: "1008003444", + 36: "1008003577", + 37: "1008003600", + 38: "1008003733", + 39: "1008003866", + 40: "1008003999", + 41: "1008004011", + 42: "1008004144", + 43: "1008004277", + 44: "1008004300", + 45: "1008004433", + 46: "1008004566", + 47: "1008004699", + 48: "1008004722", + 49: "1008004855", + 50: "1008004988", + 51: "1008005000", + 52: "1008005133", + 53: "1008005266", + 54: "1008005399", + 55: "1008005422", + 56: "1008005555", + 57: "1008005688", + 58: "1008005711", + 59: "1008005844", + 60: "1008005977", + 61: "1008006099", + 62: "1008006122", + 63: "1008006255", + 64: "1008006388", + 65: "1008006411", + 66: "1008006544", + 67: "1008006677", + 68: "1008006700", + 69: "1008006833", + 70: "1008006966", + 71: "1008007088", + 72: "1008007111", + 73: "1008007244", + 74: "1008007377", + 75: "1008007400", + 76: "1008007533", + 77: "1008007666", + 78: "1008007799", + 79: "1008007822", + 80: "1008007955", + 81: "1008008077", + 82: "1008008100", + 83: "1008008233", + 84: "1008008366", + 85: "1008008499", + 86: "1008008522", + 87: "1008008655", + 88: "1008008788", + 89: "1008008811", + 90: "1008008944", + 91: "1008009066", + 92: "1008009199", + 93: "1008009222", + 94: "1008009355", + 95: "1008009488", + 96: "1008009511", + 97: "1008009644", + 98: "1008009777", + 99: "1008009800", + 100: "1008009933", + 101: "1008010022", + 102: "1008010155", + 103: "1008010288", + 104: "1008010311", + 105: "1008010444", + 106: "1008010577", + 107: "1008010600", + 108: "1008010733", + 109: "1008010866", + 110: "1008010999", + 111: "1008011011", + 112: "1008011144", + 113: "1008011277", + 114: "1008011300", + 115: "1008011433", + 116: "1008011566", + 117: "1008011699", + 118: "1008011722", + 119: "1008011855", + 120: "1008011988", + 121: "1008012000", + 122: "1008012133", + 123: "1008012266", + 124: "1008012399", + 125: "1008012422", + 126: "1008012555", + 127: "1008012688", + 128: "1008012711", + 129: "1008012844", + 130: "1008012977", + 131: "1008013099", + 132: "1008013122", + 133: "1008013255", + 134: "1008013388", + 135: "1008013411", + 136: "1008013544", + 137: "1008013677", + 138: "1008013700", + 139: "1008013833", + 140: "1008013966", + 141: "1008014088", + 142: "1008014111", + 143: "1008014244", + 144: "1008014377", + 145: "1008014400", + 146: "1008014533", + 147: "1008014666", + 148: "1008014799", + 149: "1008014822", + 150: "1008014955", + 151: "1008015077", + 152: "1008015100", + 153: "1008015233", + 154: "1008015366", + 155: "1008015499", + 156: "1008015522", + 157: "1008015655", + 158: "1008015788", + 159: "1008015811", + 160: "1008015944", + 161: "1008016066", + 162: "1008016199", + 163: "1008016222", + 164: "1008016355", + 165: "1008016488", + 166: "1008016511", + 167: "1008016644", + 168: "1008016777", + 169: "1008016800", + 170: "1008016933", + 171: "1008017055", + 172: "1008017188", + 173: "1008017211", + 174: "1008017344", + 175: "1008017477", + 176: "1008017500", + 177: "1008017633", + 178: "1008017766", + 179: "1008017899", + 180: "1008017922", +} diff --git a/src/core/models.py b/src/core/models.py new file mode 100644 index 0000000..a7d4688 --- /dev/null +++ b/src/core/models.py @@ -0,0 +1,410 @@ +import json +from dataclasses import dataclass, field +from enum import Enum +from typing import Any, Optional, Union + +import regex + +from src.logic.openai import name_tester, run_shortener, semester_converter +from src.logic.semester import Semester + + +@dataclass +class Prof: + id: Optional[int] = None + _title: Optional[str] = None + firstname: Optional[str] = None + lastname: Optional[str] = None + fullname: Optional[str] = None + mail: Optional[str] = None + telnr: Optional[str] = None + + # add function that sets the data based on a dict + def from_dict(self, data: dict[str, Union[str, int]]): + for key, value in data.items(): + if hasattr(self, key): + setattr(self, key, value) + return self + + @property + def title(self) -> str: + if self._title is None or self._title == "None": + return "" + return self._title + + @title.setter + def title(self, value: str): + self._title = value + + # add function that sets the data from a tuple + def from_tuple(self, data: tuple[Union[str, int], ...]) -> "Prof": + setattr(self, "id", data[0]) + setattr(self, "_title", data[1]) + setattr(self, "firstname", data[2]) + setattr(self, "lastname", data[3]) + setattr(self, "fullname", data[4]) + setattr(self, "mail", data[5]) + setattr(self, "telnr", data[6]) + return self + + def name(self, comma: bool = False) -> Optional[str]: + if self.firstname is None and self.lastname is None: + if "," in self.fullname: + self.firstname = self.fullname.split(",")[1].strip() + self.lastname = self.fullname.split(",")[0].strip() + else: + return self.fullname + + if comma: + return f"{self.lastname}, {self.firstname}" + return f"{self.lastname} {self.firstname}" + + +@dataclass +class BookData: + ppn: str | None = None + title: str | None = None + signature: str | None = None + edition: str | None = None + link: str | None = None + isbn: Union[str, list[str], None] = field(default_factory=list) + author: str | None = None + language: Union[str, list[str], None] = field(default_factory=list) + publisher: str | None = None + place: str | None = None + year: int | None = None + pages: str | None = None + library_location: str | None = None + in_apparat: bool | None = False + adis_idn: str | None = None + old_book: Any | None = None + media_type: str | None = None # + in_library: bool | None = None # whether the book is in the library or not + medianr: int | None = None # Media number in the library system + + def __post_init__(self): + self.library_location = ( + str(self.library_location) if self.library_location else None + ) + if isinstance(self.language, list) and self.language: + self.language = [lang.strip() for lang in self.language if lang.strip()] + self.language = ",".join(self.language) + self.year = regex.sub(r"[^\d]", "", str(self.year)) if self.year else None + self.in_library = True if self.signature else False + + def from_dict(self, data: dict) -> "BookData": + for key, value in data.items(): + setattr(self, key, value) + return self + + def merge(self, other: "BookData") -> "BookData": + for key, value in other.__dict__.items(): + # merge lists, if the attribute is a list, extend it + if isinstance(value, list): + current_value = getattr(self, key) + if current_value is None: + current_value = [] + elif not isinstance(current_value, list): + current_value = [current_value] + # extend the list with the new values, but only if they are not already in the list + for v in value: + if v not in current_value: + current_value.append(v) + setattr(self, key, current_value) + if value is not None and ( + getattr(self, key) is None or getattr(self, key) == "" + ): + setattr(self, key, value) + # in language, drop all entries that are longer than 3 characters + if isinstance(self.language, list): + self.language = [lang for lang in self.language if len(lang) <= 4] + return self + + @property + def to_dict(self) -> str: + """Convert the dataclass to a dictionary.""" + data_dict = { + key: value for key, value in self.__dict__.items() if value is not None + } + # remove old_book from data_dict + if "old_book" in data_dict: + del data_dict["old_book"] + return json.dumps(data_dict, ensure_ascii=False) + + def from_dataclass(self, dataclass: Optional[Any]) -> None: + if dataclass is None: + return + for key, value in dataclass.__dict__.items(): + setattr(self, key, value) + + def get_book_type(self) -> str: + if "Online" in self.pages: + return "eBook" + else: + return "Druckausgabe" + + def from_string(self, data: str) -> "BookData": + ndata = json.loads(data) + + return BookData(**ndata) + + def from_LehmannsSearchResult(self, result: Any) -> "BookData": + self.title = result.title + self.author = "; ".join(result.authors) if result.authors else None + self.edition = str(result.edition) if result.edition else None + self.link = result.url + self.isbn = ( + result.isbn13 + if isinstance(result.isbn13, list) + else [result.isbn13] + if result.isbn13 + else [] + ) + self.pages = str(result.pages) if result.pages else None + self.publisher = result.publisher + self.year = str(result.year) if result.year else None + # self.pages = str(result.pages) if result.pages else None + return self + + @property + def edition_number(self) -> Optional[int]: + if self.edition is None: + return 0 + match = regex.search(r"(\d+)", self.edition) + if match: + return int(match.group(1)) + return 0 + + +@dataclass +class MailData: + subject: Optional[str] = None + body: Optional[str] = None + mailto: Optional[str] = None + prof: Optional[str] = None + + +class Subjects(Enum): + BIOLOGY = (1, "Biologie") + CHEMISTRY = (2, "Chemie") + GERMAN = (3, "Deutsch") + ENGLISH = (4, "Englisch") + PEDAGOGY = (5, "Erziehungswissenschaft") + FRENCH = (6, "Französisch") + GEOGRAPHY = (7, "Geographie") + HISTORY = (8, "Geschichte") + HEALTH_EDUCATION = (9, "Gesundheitspädagogik") + HTW = (10, "Haushalt / Textil") + ART = (11, "Kunst") + MATH_IT = (12, "Mathematik / Informatik") + MEDIAPEDAGOGY = (13, "Medien in der Bildung") + MUSIC = (14, "Musik") + PHILOSOPHY = (15, "Philosophie") + PHYSICS = (16, "Physik") + POLITICS = (17, "Politikwissenschaft") + PRORECTORATE = (18, "Prorektorat Lehre und Studium") + PSYCHOLOGY = (19, "Psychologie") + SOCIOLOGY = (20, "Soziologie") + SPORT = (21, "Sport") + TECHNIC = (22, "Technik") + THEOLOGY = (23, "Theologie") + ECONOMICS = (24, "Wirtschaftslehre") + + @property + def id(self) -> int: + return self.value[0] + + @property + def name(self) -> str: + return self.value[1] + + @classmethod + def get_index(cls, name: str) -> Optional[int]: + for i in cls: + if i.name == name: + return i.id - 1 + return None + + +@dataclass +class Apparat: + id: int | None = None + name: str | None = None + prof_id: int | None = None + subject: str | None = None + appnr: int | None = None + created_semester: str | None = None + extended_at: str | None = None + eternal: bool = False + extend_until: str | None = None + deleted: int | None = None + deleted_date: str | None = None + apparat_id_adis: str | None = None + prof_id_adis: str | None = None + konto: int | None = None + + def from_tuple(self, data: tuple[Any, ...]) -> "Apparat": + self.id = data[0] + self.name = data[1] + self.prof_id = data[2] + self.subject = data[3] + self.appnr = data[4] + self.created_semester = data[5] + self.extended_at = data[6] + self.eternal = data[7] + self.extend_until = data[8] + self.deleted = data[9] + self.deleted_date = data[10] + self.apparat_id_adis = data[11] + self.prof_id_adis = data[12] + self.konto = data[13] + return self + + @property + def get_semester(self) -> Optional[str]: + if self.extend_until is not None: + return self.extend_until + else: + return self.created_semester + + +@dataclass +class ELSA: + id: int | None = None + date: str | None = None + semester: str | None = None + prof_id: int | None = None + + def from_tuple(self, data: tuple[Any, ...]) -> "ELSA": + self.id = data[0] + self.date = data[1] + self.semester = data[2] + self.prof_id = data[3] + return self + + +@dataclass +class ApparatData: + prof: Prof = field(default_factory=Prof) + apparat: Apparat = field(default_factory=Apparat) + + +@dataclass +class XMLMailSubmission: + name: Optional[str] = None + lastname: Optional[str] = None + title: Optional[str] = None + telno: Optional[int] = None + email: Optional[str] = None + app_name: Optional[str] = None + subject: Optional[str] = None + semester: Optional[Semester] = None + books: Optional[list[BookData]] = None + + +@dataclass +class Book: + author: str = None + year: str = None + edition: str = None + title: str = None + location: str = None + publisher: str = None + signature: str = None + internal_notes: str = None + + @property + def has_signature(self) -> bool: + return self.signature is not None and self.signature != "" + + @property + def is_empty(self) -> bool: + return all( + [ + self.author == "", + self.year == "", + self.edition == "", + self.title == "", + self.location == "", + self.publisher == "", + self.signature == "", + self.internal_notes == "", + ] + ) + + def from_dict(self, data: dict[str, Any]): + for key, value in data.items(): + value = value.strip() + if value == "\u2002\u2002\u2002\u2002\u2002": + value = "" + + if key == "Autorenname(n):Nachname, Vorname": + self.author = value + elif key == "Jahr/Auflage": + self.year = value.split("/")[0] if "/" in value else value + self.edition = value.split("/")[1] if "/" in value else "" + elif key == "Titel": + self.title = value + elif key == "Ort und Verlag": + self.location = value.split(",")[0] if "," in value else value + self.publisher = value.split(",")[1] if "," in value else "" + elif key == "Standnummer": + self.signature = value.strip() + elif key == "Interne Vermerke": + self.internal_notes = value + + +@dataclass +class SemapDocument: + subject: str = None + phoneNumber: int = None + mail: str = None + title: str = None + title_suggestions: list[str] = None + semester: Union[str, Semester] = None + books: list[Book] = None + eternal: bool = False + personName: str = None + personTitle: str = None + title_length = 0 + title_max_length = 0 + + def __post_init__(self): + self.title_suggestions = [] + + @property + def nameSetter(self): + data = name_tester(self.personTitle) + name = f"{data['last_name']}, {data['first_name']}" + if data["title"] is not None: + title = data["title"] + self.personTitle = title + self.personName = name + self.title_length = len(self.title) + 3 + len(self.personName.split(",")[0]) + if self.title_length > 40: + name_len = len(self.personName.split(",")[0]) + self.title_max_length = 38 - name_len + suggestions = run_shortener(self.title, self.title_max_length) + for suggestion in suggestions: + self.title_suggestions.append(suggestion["shortened_string"]) + else: + self.title_suggestions = [] + pass + + @property + def renameSemester(self) -> None: + if self.semester: + if ", Dauer" in self.semester: + self.semester = self.semester.split(",")[0] + self.eternal = True + self.semester = Semester().from_string(self.semester) + else: + self.semester = Semester().from_string( + semester_converter(self.semester) + ) + + @property + def signatures(self) -> list[str]: + if self.books is not None: + return [book.signature for book in self.books if book.has_signature] + return [] diff --git a/src/core/semester.py b/src/core/semester.py new file mode 100644 index 0000000..08e2b03 --- /dev/null +++ b/src/core/semester.py @@ -0,0 +1,248 @@ +"""Semester helper class + +A small utility around the *German* academic calendar that distinguishes +between *Wintersemester* (WiSe) and *Sommersemester* (SoSe). + +Key points +---------- +* A **`Semester`** is identified by a *term* ("SoSe" or "WiSe") and the last two + digits of the calendar year in which the term *starts*. +* Formatting **never** pads the year with a leading zero – so ``6`` stays ``6``. +* ``offset(n)`` and the static ``generate_missing`` reliably walk the timeline + one semester at a time with correct year transitions: + + SoSe 6 → **WiSe 6/7** → SoSe 7 → WiSe 7/8 → … +""" + +from __future__ import annotations + +import datetime +import re + +from src.shared.logging import log + + +class Semester: + """Represents a German university semester (WiSe or SoSe).""" + + # ------------------------------------------------------------------ + # Class‑level defaults – will be *copied* to each instance and then + # potentially overwritten in ``__init__``. + # ------------------------------------------------------------------ + _year: int | None = int(str(datetime.datetime.now().year)[2:]) # 24 → 24 + _semester: str | None = None # "WiSe" or "SoSe" – set later + _month: int | None = datetime.datetime.now().month + value: str | None = None # Human‑readable label, e.g. "WiSe 23/24" + + # ------------------------------------------------------------------ + # Construction helpers + # ------------------------------------------------------------------ + def __init__( + self, + year: int | None = None, + semester: str | None = None, + month: int | None = None, + ) -> None: + if year is not None: + self._year = int(year) + if semester is not None: + if semester not in ("WiSe", "SoSe"): + raise ValueError("semester must be 'WiSe' or 'SoSe'") + self._semester = semester + if month is not None: + self._month = month + + self.__post_init__() + + def __post_init__(self) -> None: # noqa: D401 – keep original name + if self._year is None: + self._year = int(str(datetime.datetime.now().year)[2:]) + if self._month is None: + self._month = datetime.datetime.now().month + if self._semester is None: + self._generate_semester_from_month() + self._compute_value() + + # ------------------------------------------------------------------ + # Dunder helpers + # ------------------------------------------------------------------ + def __str__(self) -> str: # noqa: D401 – keep original name + return self.value or "" + + def __repr__(self) -> str: # Helpful for debugging lists + return f"Semester({self._year!r}, {self._semester!r})" + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + def _generate_semester_from_month(self) -> None: + """Infer *WiSe* / *SoSe* from the month attribute.""" + self._semester = "WiSe" if (self._month <= 3 or self._month > 9) else "SoSe" + + def _compute_value(self) -> None: + """Human‑readable semester label – e.g. ``WiSe 23/24`` or ``SoSe 24``.""" + year = self._year + if self._semester == "WiSe": + next_year = (year + 1) % 100 # wrap 99 → 0 + self.value = f"WiSe {year}/{next_year}" + else: # SoSe + self.value = f"SoSe {year}" + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + def offset(self, value: int) -> "Semester": + """Return a new :class:`Semester` *value* steps away. + + The algorithm maps every semester to a monotonically increasing + *linear index* so that simple addition suffices: + + ``index = year * 2 + (0 if SoSe else 1)``. + """ + if not isinstance(value, int): + raise TypeError("value must be an int (number of semesters to jump)") + if value == 0: + return Semester(self._year, self._semester) + + current_idx = self._year * 2 + (0 if self._semester == "SoSe" else 1) + target_idx = current_idx + value + if target_idx < 0: + raise ValueError("offset would result in a negative year – not supported") + + new_year, semester_bit = divmod(target_idx, 2) + new_semester = "SoSe" if semester_bit == 0 else "WiSe" + return Semester(new_year, new_semester) + + # ------------------------------------------------------------------ + # Comparison helpers + # ------------------------------------------------------------------ + def isPastSemester(self, current: "Semester") -> bool: + log.debug(f"Comparing {self} < {current}") + if self.year < current.year: + return True + if self.year == current.year: + return ( + self.semester == "WiSe" and current.semester == "SoSe" + ) # WiSe before next SoSe + return False + + def isFutureSemester(self, current: "Semester") -> bool: + if self.year > current.year: + return True + if self.year == current.year: + return ( + self.semester == "SoSe" and current.semester == "WiSe" + ) # SoSe after WiSe of same year + return False + + def isMatch(self, other: "Semester") -> bool: + return self.year == other.year and self.semester == other.semester + + # ------------------------------------------------------------------ + # Convenience properties + # ------------------------------------------------------------------ + @property + def next(self) -> "Semester": + return self.offset(1) + + @property + def previous(self) -> "Semester": + return self.offset(-1) + + @property + def year(self) -> int: + return self._year + + @property + def semester(self) -> str: + return self._semester + + # ------------------------------------------------------------------ + # Static helpers + # ------------------------------------------------------------------ + @staticmethod + def generate_missing(start: "Semester", end: "Semester") -> list[str]: + """Return all consecutive semesters from *start* to *end* (inclusive).""" + if not isinstance(start, Semester) or not isinstance(end, Semester): + raise TypeError("start and end must be Semester instances") + if start.isFutureSemester(end) and not start.isMatch(end): + raise ValueError("'start' must not be after 'end'") + + chain: list[Semester] = [start.value] + current = start + while not current.isMatch(end): + current = current.next + chain.append(current.value) + if len(chain) > 1000: # sanity guard + raise RuntimeError("generate_missing exceeded sane iteration limit") + return chain + + # ------------------------------------------------------------------ + # Parsing helper + # ------------------------------------------------------------------ + @classmethod + def from_string(cls, s: str) -> "Semester": + """Parse a human‑readable semester label and return a :class:`Semester`. + + Accepted formats (case‑insensitive):: + + "SoSe " → SoSe of year YY + "WiSe /" → Winter term starting in YY + "WiSe " → Shorthand for the above (next year implied) + + ``YY`` may contain a leading zero ("06" → 6). + """ + if not isinstance(s, str): + raise TypeError("s must be a string") + + pattern = r"\s*(WiSe|SoSe)\s+(\d{1,2})(?:\s*/\s*(\d{1,2}))?\s*" + m = re.fullmatch(pattern, s, flags=re.IGNORECASE) + if not m: + raise ValueError( + "invalid semester string format – expected 'SoSe YY' or 'WiSe YY/YY' (spacing flexible)" + ) + + term_raw, y1_str, y2_str = m.groups() + term = term_raw.capitalize() # normalize case → "WiSe" or "SoSe" + year = int(y1_str.lstrip("0") or "0") # "06" → 6, "0" stays 0 + + if term == "SoSe": + if y2_str is not None: + raise ValueError( + "SoSe string should not contain '/' followed by a second year" + ) + return cls(year, "SoSe") + + # term == "WiSe" + if y2_str is not None: + next_year = int(y2_str.lstrip("0") or "0") + expected_next = (year + 1) % 100 + if next_year != expected_next: + raise ValueError("WiSe second year must equal first year + 1 (mod 100)") + # Accept both explicit "WiSe 6/7" and shorthand "WiSe 6" + return cls(year, "WiSe") + + +# ------------------------- quick self‑test ------------------------- +if __name__ == "__main__": + # Chain generation demo ------------------------------------------------ + s_start = Semester(6, "SoSe") # SoSe 6 + s_end = Semester(25, "WiSe") # WiSe 25/26 + chain = Semester.generate_missing(s_start, s_end) + # print("generate_missing:", [str(s) for s in chain]) + + # Parsing demo --------------------------------------------------------- + examples = [ + "SoSe 6", + "WiSe 6/7", + "WiSe 6", + "SoSe 23", + "WiSe 23/24", + "WiSe 24", + "WiSe 99/00", + "SoSe 00", + "WiSe 100/101", # test large year + ] + for ex in examples: + parsed = Semester.from_string(ex) + print(f"'{ex}' → {parsed} ({parsed.year=}, {parsed.semester=})") diff --git a/src/database/__init__.py b/src/database/__init__.py new file mode 100644 index 0000000..4e811a4 --- /dev/null +++ b/src/database/__init__.py @@ -0,0 +1,5 @@ +"""Database layer for data persistence.""" + +from .connection import Database + +__all__ = ["Database"] diff --git a/src/database/connection.py b/src/database/connection.py new file mode 100644 index 0000000..4111457 --- /dev/null +++ b/src/database/connection.py @@ -0,0 +1,2008 @@ +import datetime +import json +import os +import re +import sqlite3 as sql +import tempfile +from dataclasses import asdict +from pathlib import Path +from string import ascii_lowercase as lower +from string import digits, punctuation +from typing import Any, List, Optional, Tuple, Union + +from src import DATABASE_DIR, settings +from src.database.schemas import ( + CREATE_ELSA_FILES_TABLE, + CREATE_ELSA_MEDIA_TABLE, + CREATE_ELSA_TABLE, + CREATE_TABLE_APPARAT, + CREATE_TABLE_FILES, + CREATE_TABLE_MEDIA, + CREATE_TABLE_MESSAGES, + CREATE_TABLE_NEWEDITIONS, + CREATE_TABLE_PROF, + CREATE_TABLE_SUBJECTS, + CREATE_TABLE_USER, +) +from src.errors import AppPresentError, NoResultError +from src.core.models import ELSA, Apparat, ApparatData, BookData, Prof +from src.core.constants import SEMAP_MEDIA_ACCOUNTS +from src.core.semester import Semester +from src.shared.logging import log +from src.utils.blob import create_blob + +ascii_lowercase = lower + digits + punctuation + + +# get the line that called the function +class Database: + """ + Initialize the database and create the tables if they do not exist. + """ + + def __init__(self, db_path: Union[Path, None] = None): + """ + Default constructor for the database class + + Args: + db_path (str, optional): Optional Path for testing / specific purposes. Defaults to None. + """ + if db_path is None: + if settings.database.path is not None: + self.db_path = Path( + settings.database.path.expanduser(), settings.database.name + ) + else: + self.db_path = None + + # self.db_path = self.db_path.replace("~", str(Path.home())) + else: + self.db_path = db_path + log.debug(f"Database path: {self.db_path}") + self.db_initialized = False + self.startup_check() + + def startup_check(self): + # check existence of all tables. if any is missing, recreate the table + if not self.db_initialized: + self.initializeDatabase() + tables = self.get_db_contents() + tables = [t[1] for t in tables] if tables is not None else [] + required_tables = [ + "semesterapparat", + "messages", + "media", + "files", + "prof", + "user", + "subjects", + "elsa", + "elsa_files", + "elsa_media", + "neweditions", + ] + + for table in required_tables: + if table not in tables: + log.critical(f"Table {table} is missing, recreating...") + self.create_table(table) + + def create_table(self, table_name: str): + match table_name: + case "semesterapparat": + query = CREATE_TABLE_APPARAT + case "messages": + query = CREATE_TABLE_MESSAGES + case "media": + query = CREATE_TABLE_MEDIA + case "files": + query = CREATE_TABLE_FILES + case "prof": + query = CREATE_TABLE_PROF + case "user": + query = CREATE_TABLE_USER + case "subjects": + query = CREATE_TABLE_SUBJECTS + case "elsa": + query = CREATE_ELSA_TABLE + case "elsa_files": + query = CREATE_ELSA_FILES_TABLE + case "elsa_media": + query = CREATE_ELSA_MEDIA_TABLE + case "neweditions": + query = CREATE_TABLE_NEWEDITIONS + case _: + log.error(f"Table {table_name} is not a valid table name") + self.query_db(query) + + def initializeDatabase(self): + if not self.db_initialized: + self.checkDatabaseStatus() + self.db_initialized = True + # run migrations after initial creation to bring schema up-to-date + try: + if self.db_path is not None: + self.run_migrations() + except Exception as e: + log.error(f"Error while running migrations: {e}") + + # --- Migration helpers integrated into Database --- + def _ensure_migrations_table(self, conn: sql.Connection) -> None: + cursor = conn.cursor() + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS schema_migrations ( + id TEXT PRIMARY KEY, + applied_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + conn.commit() + + def _applied_migrations(self, conn: sql.Connection) -> List[str]: + cursor = conn.cursor() + cursor.execute("SELECT id FROM schema_migrations ORDER BY id") + rows = cursor.fetchall() + return [r[0] for r in rows] + + def _apply_sql_file(self, conn: sql.Connection, path: Path) -> None: + log.info(f"Applying migration {path.name}") + sql_text = path.read_text(encoding="utf-8") + cursor = conn.cursor() + cursor.executescript(sql_text) + cursor.execute( + "INSERT OR REPLACE INTO schema_migrations (id) VALUES (?)", (path.name,) + ) + conn.commit() + + def run_migrations(self) -> None: + """Apply unapplied .sql migrations from src/backend/migrations using this Database's connection.""" + migrations_dir = Path(__file__).parent / "migrations" + if not migrations_dir.exists(): + log.debug("Migrations directory does not exist, skipping migrations") + return + + conn = self.connect() + try: + self._ensure_migrations_table(conn) + applied = set(self._applied_migrations(conn)) + + migration_files = sorted( + [p for p in migrations_dir.iterdir() if p.suffix == ".sql"] + ) + for m in migration_files: + if m.name in applied: + log.debug(f"Skipping already applied migration {m.name}") + continue + self._apply_sql_file(conn, m) + finally: + conn.close() + + # --- end migration helpers --- + + def overwritePath(self, new_db_path: str): + log.debug("got new path, overwriting") + self.db_path = Path(new_db_path) + + def checkDatabaseStatus(self): + path = settings.database.path + if path is None: + path = Path(DATABASE_DIR) + # path = path.replace("~", str(Path.home())) + # path = os.path.abspath(path) + if not os.path.exists(path): + # create path + # log.debug(path) + os.makedirs(path) + if self.get_db_contents() == []: + log.critical("Database does not exist, creating tables") + log.critical(f"Path: {path}") + self.create_tables() + self.insertSubjects() + + def getElsaMediaID(self, work_author: str, signature: str, pages: str): + query = ( + "SELECT id FROM elsa_media WHERE work_author=? AND signature=? AND pages=?" + ) + params = (work_author, signature, pages) + result = self.query_db(query, params, one=True) + if result is None: + return NoResultError( + f"work_author: {work_author}, signature: {signature}, pages: {pages}" + ).__str__() + return result[0] + + def getElsaMediaType(self, id): + query = "SELECT type FROM elsa_media WHERE id=?" + return self.query_db(query, (id,), one=True)[0] + + def get_db_contents(self) -> Union[List[Tuple[Any]], None]: + """ + Get the contents of the + + Returns: + Union[List[Tuple], None]: _description_ + """ + try: + with sql.connect(self.db_path) as conn: + cursor = conn.cursor() + cursor.execute("SELECT * FROM sqlite_master WHERE type='table'") + return cursor.fetchall() + except sql.OperationalError: + return None + + def connect(self) -> sql.Connection: + """ + Connect to the database + + Returns: + sql.Connection: The active connection to the database + """ + conn = sql.connect(self.db_path) + # Fast pragmas suitable for a desktop app DB + conn.execute("PRAGMA journal_mode=WAL;") + conn.execute("PRAGMA synchronous=NORMAL;") + conn.execute("PRAGMA temp_store=MEMORY;") + conn.execute("PRAGMA mmap_size=134217728;") # 128MB + return conn + + def close_connection(self, conn: sql.Connection): + """ + closes the connection to the database + + Args: + ---- + - conn (sql.Connection): the connection to be closed + """ + conn.close() + + def create_tables(self): + """ + Create the tables in the database + """ + # Bootstrapping of tables is handled via migrations. Run migrations instead + # of executing the hard-coded DDL here. Migrations are idempotent and + # contain the CREATE TABLE IF NOT EXISTS statements. + self.run_migrations() + + def insertInto(self, query: str, params: Tuple) -> None: + """ + Insert sent data into the database + + Args: + query (str): The query to be executed + params (Tuple): the parameters to be inserted into the database + """ + conn = self.connect() + cursor = conn.cursor() + log.debug(f"Inserting into DB: {query}") + cursor.execute(query, params) + conn.commit() + self.close_connection(conn) + + def getWebADISAuth(self) -> Tuple[str, str]: + """ + Get the WebADIS authentication data from the database + + Returns: + Tuple[str, str]: The username and password for WebADIS + """ + result = self.query_db( + "SELECT username, password FROM webadis_login WHERE effective_range='SAP'", + one=True, + ) + if result is None: + return ("", "") + return (result[0], result[1]) + + @log.catch + def query_db( + self, + query: str, + args: Tuple[Any] = (), # type:ignore + one: bool = False, # type:ignore + ) -> Union[Tuple[Any, Any], List[Tuple[Any, Any]]]: + """ + Query the Database for the sent query. + + Args: + query (str): The query to be executed + args (Tuple, optional): The arguments for the query. Defaults to (). + one (bool, optional): Return the first result only. Defaults to False. + + Returns: + Union[Tuple | List[Tuple]]: Returns the result of the query + """ + conn = self.connect() + cursor = conn.cursor() + logs_query = query + + logs_args = args + # if "fileblob" in query: + # # set fileblob arg in logger to "too long" + # logs_query = query + # fileblob_location = query.find("fileblob") + # # remove fileblob from query + # logs_query = query[:fileblob_location] + "fileblob = too long" + + log_message = f"Querying database with query {logs_query}, args: {logs_args}" + # if "INSERT" in query: + # log_message = f"Querying database with query {query}" + if "INTO user" in query: + log_message = f"Querying database with query {query}" + # log.debug(f"DB Query: {log_message}") + log.debug(log_message) + try: + cursor.execute(query, args) + rv = cursor.fetchall() + conn.commit() + self.close_connection(conn) + except sql.OperationalError as e: + log.error(f"Error in query: {e}") + return None + return (rv[0] if rv else None) if one else rv + + # Books + def addBookToDatabase( + self, bookdata: BookData, app_id: Union[str, int], prof_id: Union[str, int] + ): + """ + Add books to the database. Both app_id and prof_id are required to add the book to the database, as the app_id and prof_id are used to select the books later on. + + Args: + bookdata (BookData): The metadata of the book to be added + app_id (str): The apparat id where the book should be added to + prof_id (str): The id of the professor where the book should be added to. + """ + log.info(f"Adding book {bookdata.signature} to database") + if app_id is None or prof_id is None: + raise ValueError("Apparate ID or Prof ID is None") + conn = self.connect() + cursor = conn.cursor() + t_query = ( + f"SELECT bookdata FROM media WHERE app_id={app_id} AND prof_id={prof_id}" + ) + log.debug(t_query) + # # log.debug(t_query) + result = cursor.execute(t_query).fetchall() + result = [BookData().from_string(i[0]) for i in result] + if bookdata in result: + # log.debug("Bookdata already in database") + # check if the book was deleted in the apparat + query = ( + "SELECT deleted FROM media WHERE app_id=? AND prof_id=? AND bookdata=?" + ) + params = (app_id, prof_id, json.dumps(asdict(bookdata), ensure_ascii=False)) + result = cursor.execute(query, params).fetchone() + if result[0] == 1: + # log.debug("Book was deleted, updating bookdata") + query = "UPDATE media SET deleted=0 WHERE app_id=? AND prof_id=? AND bookdata=?" + params = ( + app_id, + prof_id, + json.dumps(asdict(bookdata), ensure_ascii=False), + ) + cursor.execute(query, params) + conn.commit() + return + + query = ( + "INSERT INTO media (bookdata, app_id, prof_id,deleted) VALUES (?, ?, ?,?)" + ) + converted = json.dumps(asdict(bookdata), ensure_ascii=False) + params = (converted, app_id, prof_id, 0) + cursor.execute(query, params) + logMessage = f"Added book with signature {bookdata.signature} to database, data: {converted}" + log.info(logMessage) + conn.commit() + self.close_connection(conn) + + def getBookIdBasedOnSignature( + self, app_id: Union[str, int], prof_id: Union[str, int], signature: str + ) -> int: + """ + Get a book id based on the signature of the book. + + Args: + app_id (str): The apparat id the book should be associated with + prof_id (str): The professor id the book should be associated with + signature (str): The signature of the book + + Returns: + int: The id of the book + """ + result = self.query_db( + "SELECT bookdata, id FROM media WHERE app_id=? AND prof_id=?", + (app_id, prof_id), + ) + books = [(BookData().from_string(i[0]), i[1]) for i in result] + book = [i for i in books if i[0].signature == signature][0][1] + return book + + def getBookBasedOnSignature( + self, app_id: Union[str, int], prof_id: Union[str, int], signature: str + ) -> BookData: + """ + Get the book based on the signature of the book. + + Args: + app_id (str): The apparat id the book should be associated with + prof_id (str): The professor id the book should be associated with + signature (str): The signature of the book + + Returns: + BookData: The total metadata of the book wrapped in a BookData object + """ + result = self.query_db( + "SELECT bookdata FROM media WHERE app_id=? AND prof_id=?", (app_id, prof_id) + ) + books: list[BookData] = [BookData().from_string(i[0]) for i in result] + book = [i for i in books if i.signature == signature][0] + return book + + def getLastBookId(self) -> int: + """ + Get the last book id in the database + + Returns: + int: ID of the last book in the database + """ + return self.query_db("SELECT id FROM media ORDER BY id DESC", one=True)[0] + + def searchBook( + self, data: dict[str, str] + ) -> Optional[list[tuple["BookData", int, int]]]: + """ + Search a book in the database using regex against signature/title. + + Args: + data: may contain: + - "signature": regex to match against BookData.signature + - "title": regex to match against BookData.title + + Returns: + list of (BookData, app_id, prof_id) tuples, or None if invalid args + """ + + # Determine mode (kept compatible with your original logic) + mode = 0 + if len(data) == 1 and "signature" in data: + mode = 1 + elif len(data) == 1 and "title" in data: + mode = 2 + elif len(data) == 2 and "signature" in data and "title" in data: + mode = 3 + else: + return None + + def _compile(expr: str) -> re.Pattern: + try: + return re.compile(expr, re.IGNORECASE | re.UNICODE) + except re.error: + # If user provided a broken regex, treat it as a literal + return re.compile(re.escape(expr), re.IGNORECASE | re.UNICODE) + + sig_re = _compile(data["signature"]) if mode in (1, 3) else None + title_re = _compile(data["title"]) if mode in (2, 3) else None + + # Fetch candidates once + rows = self.query_db("SELECT * FROM media WHERE deleted=0") + + results: list[tuple["BookData", int, int]] = [] + for row in rows: + bookdata = BookData().from_string( + row[1] + ) # assumes row[1] is the serialized bookdata + app_id = row[2] + prof_id = row[3] + + sig_val = bookdata.signature + title_val = bookdata.title + if mode == 1: + if sig_re.search(sig_val): + results.append((bookdata, app_id, prof_id)) + elif mode == 2: + if title_re.search(title_val): + results.append((bookdata, app_id, prof_id)) + else: # mode == 3 + if sig_re.search(sig_val) and title_re.search(title_val): + results.append((bookdata, app_id, prof_id)) + + return results + + def setAvailability(self, book_id: str, available: str): + """ + Set the availability of a book in the database + + Args: + book_id (str): The id of the book + available (str): The availability of the book + """ + self.query_db("UPDATE media SET available=? WHERE id=?", (available, book_id)) + + def getBookId( + self, bookdata: BookData, app_id: Union[str, int], prof_id: Union[str, int] + ) -> int: + """ + Get the id of a book based on the metadata of the book + + Args: + bookdata (BookData): The wrapped metadata of the book + app_id (str): The apparat id the book should be associated with + prof_id (str): The professor id the book should be associated with + + Returns: + int: ID of the book + """ + result = self.query_db( + "SELECT id FROM media WHERE bookdata=? AND app_id=? AND prof_id=?", + (bookdata.to_dict, app_id, prof_id), + one=True, + ) + return result[0] + + def getBook(self, book_id: int) -> BookData: + """ + Get the book based on the id in the database + + Args: + book_id (int): The id of the book + + Returns: + BookData: The metadata of the book wrapped in a BookData object + """ + return BookData().from_string( + self.query_db( + "SELECT bookdata FROM media WHERE id=?", (book_id,), one=True + )[0] + ) + + def getBooks( + self, app_id: Union[str, int], prof_id: Union[str, int], deleted: int = 0 + ) -> list[dict[str, Union[BookData, int]]]: + """ + Get the Books based on the apparat id and the professor id + + Args: + app_id (str): The ID of the apparat + prof_id (str): The ID of the professor + deleted (int, optional): The state of the book. Set to 1 to include deleted ones. Defaults to 0. + + Returns: + + list[dict[int, BookData, int]]: A list of dictionaries containing the id, the metadata of the book and the availability of the book + """ + qdata = self.query_db( + f"SELECT id,bookdata,available FROM media WHERE (app_id={app_id} AND prof_id={prof_id}) AND (deleted={deleted if deleted == 0 else '1 OR deleted=0'})" + ) + ret_result = [] + if qdata is None: + return [] + for result_a in qdata: + data: dict[str, Any] = {"id": int, "bookdata": BookData, "available": int} + data["id"] = result_a[0] + data["bookdata"] = BookData().from_string(result_a[1]) + data["available"] = result_a[2] + ret_result.append(data) + return ret_result + + def getAllBooks(self) -> list[dict[str, Union[int, BookData]]]: + """ + Get all books in the database that are not set as deleted + + Returns + ------- + list[dict[str, Union[int, BookData]]] + A list of dictionaries containing the id and the metadata of the book + """ + # return all books in the database + qdata = self.query_db("SELECT id,bookdata FROM media WHERE deleted=0") + ret_result: list[dict[str, Any]] = [] + if qdata is None: + return [] + for result_a in qdata: + data: dict[str, Any] = {"id": int, "bookdata": BookData} + data["id"] = result_a[0] + data["bookdata"] = BookData().from_string(result_a[1]) + + ret_result.append(data) + return ret_result + + def getApparatNrByBookId(self, book_id): + appNr = self.query_db( + "SELECT appnr FROM semesterapparat WHERE id IN (SELECT app_id FROM media WHERE id=?)", + (book_id,), + one=True, + ) + return appNr[0] if appNr else None + + def getBooksByProfId( + self, prof_id: int, deleted: int = 0 + ) -> list[dict[str, Union[int, BookData]]]: + """ + Get the Books based on the professor id + + Parameters + ---------- + prof_id : int + The ID of the professor + deleted : int, optional + If set to 1, it will include deleted books, by default 0 + + Returns + ------- + list[dict[str, Union[int, BookData]]] + A list of dictionaries containing the id, the metadata of the book and the availability of the book + """ + qdata = self.query_db( + f"SELECT id,bookdata,available FROM media WHERE prof_id={prof_id} AND (deleted={deleted if deleted == 0 else '1 OR deleted=0'})" + ) + ret_result = [] + if qdata is None: + return [] + for result_a in qdata: + data: dict[str, Any] = {"id": int, "bookdata": BookData, "available": int} + data["id"] = result_a[0] + data["bookdata"] = BookData().from_string(result_a[1]) + data["available"] = result_a[2] + ret_result.append(data) + return ret_result + + def updateBookdata(self, book_id: int, bookdata: BookData): + """ + Update the bookdata in the database + + Args: + book_id (str): The id of the book + bookdata (BookData): The new metadata of the book + """ + query = "UPDATE media SET bookdata= ? WHERE id=?" + book = bookdata.to_dict + self.query_db(query, (book, book_id)) + + def deleteBook(self, book_id: int): + """ + Delete a book from the database + + Args: + book_id (str): ID of the book + """ + self.query_db("UPDATE media SET deleted=1 WHERE id=?", (book_id,)) + + def deleteBooks(self, ids: list[int]): + """ + Delete multiple books from the database + + Args: + ids (list[int]): A list of book ids to be deleted + """ + query = f"UPDATE media SET deleted=1 WHERE id IN ({','.join(['?'] * len(ids))})" + self.query_db(query, tuple(ids)) + + # File Interactions + def getBlob(self, filename: str, app_id: Union[str, int]) -> bytes: + """ + Get a blob from the database + + Args: + filename (str): The name of the file + app_id (str): ID of the apparat + + Returns: + bytes: The file stored in + """ + return self.query_db( + "SELECT fileblob FROM files WHERE filename=? AND app_id=?", + (filename, app_id), + one=True, + )[0] + + def insertFile( + self, file: list[dict], app_id: Union[str, int], prof_id: Union[str, int] + ): + """Instert a list of files into the database + + Args: + file (list[dict]): a list containing all the files to be inserted + Structured: [{"name": "filename", "path": "path", "type": "filetype"}] + app_id (int): the id of the apparat + prof_id (str): the id of the professor + """ + for f in file: + filename = f["name"] + path = f["path"] + filetyp = f["type"] + if path == "Database": + continue + blob = create_blob(path) + query = "INSERT OR IGNORE INTO files (filename, fileblob, app_id, filetyp,prof_id) VALUES (?, ?, ?, ?,?)" + self.query_db(query, (filename, blob, app_id, filetyp, prof_id)) + + def recreateFile( + self, filename: str, app_id: Union[str, int], filetype: str + ) -> str: + """Recreate a file from the database + + Args: + filename (str): the name of the file + app_id (Union[str,int]): the id of the apparat + filetype (str): the extension of the file to be created + + Returns: + str: The filename of the recreated file + """ + blob = self.getBlob(filename, app_id) + log.debug(blob) + tempdir = settings.database.temp.expanduser() + if not tempdir.exists(): + tempdir.mkdir(parents=True, exist_ok=True) + file = tempfile.NamedTemporaryFile( + delete=False, dir=tempdir, mode="wb", suffix=f".{filetype}" + ) + file.write(blob) + # log.debug("file created") + return file.name + + def getFiles(self, app_id: Union[str, int], prof_id: int) -> list[tuple]: + """Get all the files associated with the apparat and the professor + + Args: + app_id (Union[str,int]): The id of the apparat + prof_id (Union[str,int]): the id of the professor + + Returns: + list[tuple]: a list of tuples containing the filename and the filetype for the corresponding apparat and professor + """ + return self.query_db( + "SELECT filename, filetyp FROM files WHERE app_id=? AND prof_id=?", + (app_id, prof_id), + ) + + def getSemesters(self) -> list[str]: + """Return all the unique semesters in the database + + Returns: + list: a list of strings containing the semesters + """ + data = self.query_db("SELECT DISTINCT erstellsemester FROM semesterapparat") + return [i[0] for i in data] + + def insertSubjects(self): + # log.debug("Inserting subjects") + subjects = [ + "Biologie", + "Chemie", + "Deutsch", + "Englisch", + "Erziehungswissenschaft", + "Französisch", + "Geographie", + "Geschichte", + "Gesundheitspädagogik", + "Haushalt / Textil", + "Kunst", + "Mathematik / Informatik", + "Medien in der Bildung", + "Musik", + "Philosophie", + "Physik", + "Politikwissenschaft", + "Prorektorat Lehre und Studium", + "Psychologie", + "Soziologie", + "Sport", + "Technik", + "Theologie", + "Wirtschaftslehre", + ] + conn = self.connect() + cursor = conn.cursor() + for subject in subjects: + cursor.execute("INSERT INTO subjects (name) VALUES (?)", (subject,)) + conn.commit() + self.close_connection(conn) + + def getSubjects(self): + """Get all the subjects in the database + + Returns: + list[tuple]: a list of tuples containing the subjects + """ + return self.query_db("SELECT * FROM subjects") + + # Messages + def addMessage( + self, messages: list[dict[str, Any]], user: str, app_id: Union[str, int] + ): + """add a Message to the database + + Args: + messages (list[dict[str, Any]]): the messages to be added + user (str): the user who added the messages + app_id (Union[str,int]): the id of the apparat + """ + + def __getUserId(user: str): + return self.query_db( + "SELECT id FROM user WHERE username=?", (user,), one=True + )[0] + + user_id = __getUserId(user) + for message in messages: + self.query_db( + "INSERT INTO messages (message, user_id, remind_at,appnr) VALUES (?,?,?,?)", + (message["message"], user_id, message["remind_at"], app_id), + ) + + def getAllMessages(self) -> list[dict[str, str, str, str]]: + """Get all the messages in the database + + Returns: + list[dict[str, str, str, str]]: a list of dictionaries containing the message, the user who added the message, the apparat id and the id of the message + """ + + def __get_user_name(user_id: int): + return self.query_db( + "SELECT username FROM user WHERE id=?", (user_id,), one=True + )[0] + + messages = self.query_db("SELECT * FROM messages") + ret = [ + { + "message": i[2], + "user": __get_user_name(i[4]), + "appnr": i[5], + "id": i[0], + "remind_at": i[3], + } + for i in messages + ] + return ret + + def getMessages(self, date: str) -> list[dict[str, str]]: + """Get all the messages for a specific date + + Args: + date (str): a date.datetime object formatted as a string in the format "YYYY-MM-DD" + + Returns: + list[dict[str, str]]: a list of dictionaries containing the message, the user who added the message, the apparat id and the id of the message + """ + + def __get_user_name(user_id: int): + return self.query_db( + "SELECT username FROM user WHERE id=?", (user_id,), one=True + )[0] + + messages = self.query_db("SELECT * FROM messages WHERE remind_at=?", (date,)) + ret = [ + {"message": i[2], "user": __get_user_name(i[4]), "appnr": i[5], "id": i[0]} + for i in messages + ] + return ret + + def deleteMessage(self, message_id: int): + """Delete a message from the database + + Args: + message_id (str): the id of the message + """ + log.debug(f"Deleting message with id {message_id}") + self.query_db("DELETE FROM messages WHERE id=?", (message_id,)) + + # Prof data + def getProfNameById(self, prof_id: Union[str, int], add_title: bool = False) -> str: + """Get a professor name based on the id + + Args: + prof_id (Union[str,int]): The id of the professor + add_title (bool, optional): wether to add the title or no. Defaults to False. + + Returns: + str: The name of the professor + """ + prof = self.query_db( + "SELECT fullname FROM prof WHERE id=?", (prof_id,), one=True + ) + if add_title: + return f"{self.getTitleById(prof_id)}{prof[0]}" + else: + return prof[0] + + def getProfMailById(self, prof_id: Union[str, int]) -> str: + """get the mail of a professor based on the id + + Args: + prof_id (Union[str,int]): the id of the professor + + Returns: + str: the mail of the professor + """ + mail = self.query_db("SELECT mail FROM prof WHERE id=?", (prof_id,), one=True)[ + 0 + ] + return mail if mail is not None else "" + + def getTitleById(self, prof_id: Union[str, int]) -> str: + """get the title of a professor based on the id + + Args: + prof_id (Union[str,int]): the id of the professor + + Returns: + str: the title of the professor, with an added whitespace at the end, if no title is present, an empty string is returned + """ + title = self.query_db( + "SELECT titel FROM prof WHERE id=?", (prof_id,), one=True + )[0] + return f"{title} " if title is not None else "" + + def getSpecificProfData( + self, prof_id: Union[str, int], fields: List[str] + ) -> tuple[Any, ...]: + """A customisable function to get specific data of a professor based on the id + + Args: + prof_id (Union[str,int]): the id of the professor + fields (List[str]): a list of fields to be returned + + Returns: + tuple: a tuple containing the requested data + """ + query = "SELECT " + for field in fields: + query += f"{field}," + query = query[:-1] + query += " FROM prof WHERE id=?" + return self.query_db(query, (prof_id,), one=True)[0] + + def getProfById(self, prof_id: Union[str, int]) -> Prof: + """Get a professor based on the id + + Args: + prof_id (Union[str,int]): the id of the professor + + Returns: + Prof: a Prof object containing the data of the professor + """ + data = self.query_db("SELECT * FROM prof WHERE id=?", (prof_id,), one=True) + return Prof().from_tuple(data) + + def getProfData(self, profname: str): + """Get mail, telephone number and title of a professor based on the name + + Args: + profname (str): name of the professor + + Returns: + tuple: the mail, telephone number and title of the professor + """ + data = self.query_db( + "SELECT * FROM prof WHERE fullname=?", + (profname.replace(",", ""),), + one=True, + ) + person = Prof() + return person.from_tuple(data) + + def getProf(self, id) -> Prof: + """Get a professor based on the id + + Args: + id ([type]): the id of the professor + + Returns: + Prof: a Prof object containing the data of the professor + """ + data = self.query_db("SELECT * FROM prof WHERE id=?", (id,), one=True) + return Prof().from_tuple(data) + + def getProfs(self) -> list[Prof]: + """Return all the professors in the database + + Returns: + list[tuple]: a list containing all the professors in individual tuples + tuple: (id, titel, fname, lname, fullname, mail, telnr) + """ + profs = self.query_db("SELECT * FROM prof") + return [Prof().from_tuple(prof) for prof in profs] + + # Apparat + def getAllAparats(self, deleted: int = 0) -> list[Apparat]: + """Get all the apparats in the database + + Args: + deleted (int, optional): Switch the result to use . Defaults to 0. + + Returns: + list[tuple]: a list of tuples containing the apparats + """ + apparats = self.query_db( + "SELECT * FROM semesterapparat WHERE deletion_status=?", (deleted,) + ) + ret: list[Apparat] = [] + for apparat in apparats: + ret.append(Apparat().from_tuple(apparat)) + return ret + + def getApparatData(self, appnr, appname) -> ApparatData: + """Get the Apparat data based on the apparat number and the name + + Args: + appnr (str): the apparat number + appname (str): the name of the apparat + + Raises: + NoResultError: an error is raised if no result is found + + Returns: + ApparatData: the appended data of the apparat wrapped in an ApparatData object + """ + result = self.query_db( + "SELECT * FROM semesterapparat WHERE appnr=? AND name=?", + (appnr, appname), + one=True, + ) + if result is None: + raise NoResultError("No result found") + apparat = ApparatData() + apparat.apparat.id = result[0] + apparat.apparat.name = result[1] + apparat.apparat.appnr = result[4] + apparat.apparat.eternal = True if result[7] == 1 else False + apparat.prof = self.getProfData(self.getProfNameById(result[2])) + apparat.prof.fullname = self.getProfNameById(result[2]) + apparat.apparat.prof_id = result[2] + + apparat.apparat.subject = result[3] + apparat.apparat.created_semester = result[5] + apparat.apparat.extend_until = result[8] + apparat.apparat.deleted = result[9] + apparat.apparat.apparat_id_adis = result[11] + apparat.apparat.prof_id_adis = result[12] + apparat.apparat.konto = result[13] + return apparat + + def getUnavailableApparatNumbers(self) -> List[int]: + """Get a list of all the apparat numbers in the database that are currently in use + + Returns: + List[int]: the list of used apparat numbers + """ + numbers = self.query_db( + "SELECT appnr FROM semesterapparat WHERE deletion_status=0" + ) + numbers = [i[0] for i in numbers] + numbers.sort() + log.info(f"Currently used apparat numbers: {numbers}") + return numbers + + def setNewSemesterDate(self, app_id: Union[str, int], newDate, dauerapp=False): + """Set the new semester date for an apparat + + Args: + app_id (Union[str,int]): the id of the apparat + newDate (str): the new date + dauerapp (bool, optional): if the apparat was changed to dauerapparat. Defaults to False. + """ + # today as yyyy-mm-dd + today = datetime.datetime.now().strftime("%Y-%m-%d") + + if dauerapp: + self.query_db( + "UPDATE semesterapparat SET verlängerung_bis=?, dauer=?, verlängert_am=? WHERE appnr=?", + (newDate, dauerapp, today, app_id), + ) + else: + self.query_db( + "UPDATE semesterapparat SET verlängerung_bis=?, verlängert_am=? WHERE appnr=?", + (newDate, today, app_id), + ) + + def getId(self, apparat_name) -> Optional[int]: + """get the id of an apparat based on the name + + Args: + apparat_name (str): the name of the apparat e.g. "Semesterapparat 1" + + Returns: + Optional[int]: the id of the apparat, if the apparat is not found, None is returned + """ + data = self.query_db( + "SELECT id FROM semesterapparat WHERE name=?", (apparat_name,), one=True + ) + if data is None: + return None + else: + return data[0] + + def getApparatId(self, apparat_name) -> Optional[int]: + """get the id of an apparat based on the name + + Args: + apparat_name (str): the name of the apparat e.g. "Semesterapparat 1" + + Returns: + Optional[int]: the id of the apparat, if the apparat is not found, None is returned + """ + data = self.query_db( + "SELECT appnr FROM semesterapparat WHERE name=?", (apparat_name,), one=True + ) + if data is None: + return None + else: + return data[0] + + def createApparat(self, apparat: ApparatData) -> int: + """create the apparat in the database + + Args: + apparat (ApparatData): the wrapped metadata of the apparat + + Raises: + AppPresentError: an error describing that the apparats chosen id is already present in the database + + Returns: + Optional[int]: the id of the apparat + """ + log.debug(apparat) + app = apparat.apparat + prof = apparat.prof + present_prof = self.getProfByName(prof.name()) + prof_id = present_prof.id + log.debug(present_prof) + + app_id = self.getApparatId(app.name) + if app_id: + return AppPresentError(app_id) + if not prof_id: + log.debug("prof id not present, creating prof with data", prof) + prof_id = self.createProf(prof) + log.debug(prof_id) + query = f"INSERT OR IGNORE INTO semesterapparat (appnr, name, erstellsemester, dauer, prof_id, fach,deletion_status,konto) VALUES ('{app.appnr}', '{app.name}', '{app.created_semester}', '{app.eternal}', {prof_id}, '{app.subject}', '{0}', '{SEMAP_MEDIA_ACCOUNTS[app.appnr]}')" + log.debug(query) + self.query_db(query) + return None + + def getApparatsByProf(self, prof_id: Union[str, int]) -> list[Apparat]: + """Get all apparats based on the professor id + + Args: + prof_id (Union[str,int]): the id of the professor + + Returns: + list[tuple]: a list of tuples containing the apparats + """ + data = self.query_db( + "SELECT * FROM semesterapparat WHERE prof_id=?", (prof_id,) + ) + ret = [] + for i in data: + log.debug(i) + ret.append(Apparat().from_tuple(i)) + return ret + + def getApparatsBySemester(self, semester: str) -> dict[list]: + """get all apparats based on the semester + + Args: + semester (str): the selected semester + + Returns: + dict[list]: a list off all created and deleted apparats for the selected semester + """ + data = self.query_db( + "SELECT name, prof_id FROM semesterapparat WHERE erstellsemester=?", + (semester,), + ) + conn = self.connect() + cursor = conn.cursor() + c_tmp = [] + for i in data: + c_tmp.append((i[0], self.getProfNameById(i[1]))) + query = ( + f"SELECT name,prof_id FROM semesterapparat WHERE deleted_date='{semester}'" + ) + result = cursor.execute(query).fetchall() + d_tmp = [] + for i in result: + d_tmp.append((i[0], self.getProfNameById(i[1]))) + # group the apparats by prof + c_ret = {} + for i in c_tmp: + if i[1] not in c_ret.keys(): + c_ret[i[1]] = [i[0]] + else: + c_ret[i[1]].append(i[0]) + d_ret = {} + for i in d_tmp: + if i[1] not in d_ret.keys(): + d_ret[i[1]] = [i[0]] + else: + d_ret[i[1]].append(i[0]) + self.close_connection(conn) + return {"created": c_ret, "deleted": d_ret} + + def getApparatCountBySemester(self) -> tuple[list[str], list[int]]: + """get a list of all apparats created and deleted by semester + + Returns: + tuple[list[str],list[int]]: a tuple containing two lists, the first list contains the semesters, the second list contains the amount of apparats created and deleted for the corresponding semester + """ + conn = self.connect() + cursor = conn.cursor() + semesters = self.getSemesters() + created = [] + deleted = [] + for semester in semesters: + query = f"SELECT COUNT(*) FROM semesterapparat WHERE erstellsemester='{semester}'" + result = cursor.execute(query).fetchone() + created.append(result[0]) + query = f"SELECT COUNT(*) FROM semesterapparat WHERE deletion_status=1 AND deleted_date='{semester}'" + result = cursor.execute(query).fetchone() + deleted.append(result[0]) + # store data in a tuple + ret = [] + for sem in semesters: + e_tuple = ( + sem, + created[semesters.index(sem)], + deleted[semesters.index(sem)], + ) + ret.append(e_tuple) + self.close_connection(conn) + return ret + + def deleteApparat(self, apparat: Apparat, semester: str): + """Delete an apparat from the database + + Args: + apparat: (Apparat): the apparat to be deleted + semester (str): the semester the apparat should be deleted from + """ + apparat_nr = apparat.appnr + app_id = self.getId(apparat.name) + self.query_db( + "UPDATE semesterapparat SET deletion_status=1, deleted_date=? WHERE appnr=? AND name=?", + (semester, apparat_nr, apparat.name), + ) + # delete all books associated with the app_id + # print(apparat_nr, app_id) + self.query_db("UPDATE media SET deleted=1 WHERE app_id=?", (app_id,)) + + def isEternal(self, id): + """check if the apparat is eternal (dauerapparat) + + Args: + id (int): the id of the apparat to be checked + + Returns: + int: the state of the apparat + """ + return self.query_db( + "SELECT dauer FROM semesterapparat WHERE appnr=?", (id,), one=True + ) + + def getApparatName(self, app_id: Union[str, int], prof_id: Union[str, int]): + """get the name of the apparat based on the id + + Args: + app_id (Union[str,int]): the id of the apparat + prof_id (Union[str,int]): the id of the professor + + Returns: + str: the name of the apparat + """ + result = self.query_db( + "SELECT name FROM semesterapparat WHERE appnr=? AND prof_id=?", + (app_id, prof_id), + one=True, + ) + if result: + return result[0] + return None + + def updateApparat(self, apparat_data: ApparatData): + """Update an apparat in the database + + Args: + apparat_data (ApparatData): the new metadata of the apparat + """ + query = "UPDATE semesterapparat SET name = ?, fach = ?, dauer = ?, prof_id = ?, prof_id_adis = ?, apparat_id_adis = ? WHERE appnr = ?" + params = ( + apparat_data.apparat.name, + apparat_data.apparat.subject, + apparat_data.apparat.eternal, + self.getProfData(apparat_data.prof.fullname).id, + apparat_data.apparat.prof_id_adis, + apparat_data.apparat.apparat_id_adis, + apparat_data.apparat.appnr, + ) + log.debug(f"Updating apparat with query {query} and params {params}") + self.query_db(query, params) + + def checkApparatExists(self, app_name: str): + """check if the apparat is already present in the database based on the name + + Args: + apparat_name (str): the name of the apparat + + Returns: + bool: True if the apparat is present, False if not + """ + return ( + True + if self.query_db( + "SELECT appnr FROM semesterapparat WHERE name=?", + (app_name,), + one=True, + ) + else False + ) + + def checkApparatExistsByNr(self, app_nr: Union[str, int]) -> bool: + """a check to see if the apparat is already present in the database, based on the nr. This query will exclude deleted apparats + + Args: + app_nr (Union[str, int]): the id of the apparat + + Returns: + bool: True if the apparat is present, False if not + """ + return ( + True + if self.query_db( + "SELECT id FROM semesterapparat WHERE appnr=? and deletion_status=0", + (app_nr,), + one=True, + ) + else False + ) + + # Statistics + def statistic_request(self, **kwargs: Any): + """Take n amount of kwargs and return the result of the query""" + + def __query(query): + """execute the query and return the result + + Args: + query (str): the constructed query + + Returns: + list: the result of the query + """ + log.debug(f"Query: {query}") + conn = self.connect() + cursor = conn.cursor() + result = cursor.execute(query).fetchall() + for result_a in result: + orig_value = result_a + prof_name = self.getProfNameById(result_a[2]) + # replace the prof_id with the prof_name + result_a = list(result_a) + result_a[2] = prof_name + result_a = tuple(result_a) + result[result.index(orig_value)] = result_a + self.close_connection(conn) + log.debug(f"Query result: {result}") + return result + + if "deletable" in kwargs.keys(): + query = f"""SELECT * FROM semesterapparat + WHERE deletion_status=0 AND dauer=0 AND + ( + (erstellsemester!='{kwargs["deletesemester"]}' AND verlängerung_bis IS NULL) OR + (erstellsemester!='{kwargs["deletesemester"]}' AND verlängerung_bis!='{kwargs["deletesemester"]}' AND verlängerung_bis!='{Semester().next}') + )""" + return __query(query) + if "dauer" in kwargs.keys(): + kwargs["dauer"] = kwargs["dauer"].replace("Ja", "1").replace("Nein", "0") + query = "SELECT * FROM semesterapparat WHERE " + for key, value in kwargs.items() if kwargs.items() is not None else {}: + # log.debug(key, value) + query += f"{key}='{value}' AND " + # log.debug(query) + # remove deletesemester part from normal query, as this will be added to the database upon deleting the apparat + if "deletesemester" in kwargs.keys(): + query = query.replace( + f"deletesemester='{kwargs['deletesemester']}' AND ", "" + ) + if "endsemester" in kwargs.keys(): + if "erstellsemester" in kwargs.keys(): + query = query.replace(f"endsemester='{kwargs['endsemester']}' AND ", "") + query = query.replace( + f"erstellsemester='{kwargs['erstellsemester']} AND ", "xyz" + ) + else: + query = query.replace( + f"endsemester='{kwargs['endsemester']}' AND ", "xyz" + ) + # log.debug("replaced") + query = query.replace( + "xyz", + f"(erstellsemester='{kwargs['endsemester']}' OR verlängerung_bis='{kwargs['endsemester']}') AND ", + ) + # remove all x="" parts from the query where x is a key in kwargs + log.info(f"Query before: {query}") + query = query.strip() + query = query[:-4] + log.info(f"Query after: {query}") + # check if query ends with lowercase letter or a '. if not, remove last symbol and try again + while query[-1] not in ascii_lowercase and query[-1] != "'": + query = query[:-1] + query = query.strip() + + # log.debug(query) + res = __query(query) + # log.debug(res) + return res + + # Admin data + def getUser(self): + """Get a single user from the database""" + return self.query_db("SELECT * FROM user", one=True) + + def getUsers(self) -> list[tuple]: + """Return a list of tuples of all the users in the database""" + return self.query_db("SELECT * FROM user") + + def login(self, user, hashed_password): + """try to login the user. + The salt for the user will be requested from the database and then added to the hashed password. The password will then be compared to the password in the database + + Args: + user (str): username that tries to login + hashed_password (str): the password the user tries to login with + + Returns: + bool: True if the login was successful, False if not + """ + try: + salt = self.query_db( + "SELECT salt FROM user WHERE username=?", (user,), one=True + )[0] + if salt is None: + return False + except TypeError: + return False + hashed_password = salt + hashed_password + password = self.query_db( + "SELECT password FROM user WHERE username=?", (user,), one=True + )[0] + if password == hashed_password: + return True + else: + return False + + def changePassword(self, user, new_password): + """change the password of a user. + The password will be added with the salt and then committed to the database + + Args: + user (str): username + new_password (str): the hashed password + """ + salt = self.query_db( + "SELECT salt FROM user WHERE username=?", (user,), one=True + )[0] + new_password = salt + new_password + self.query_db( + "UPDATE user SET password=? WHERE username=?", (new_password, user) + ) + + def getRole(self, user: str) -> str: + """get the role of the user + + Args: + user (str): username + + Returns: + str: the name of the role + """ + return self.query_db( + "SELECT role FROM user WHERE username=?", (user,), one=True + )[0] + + def getRoles(self) -> list[tuple]: + """get all the roles in the database + + Returns: + list[str]: a list of all the roles + """ + roles = self.query_db("SELECT role FROM user") + return [i[0] for i in roles] + + def checkUsername(self, user) -> bool: + """a check to see if the username is already present in the database + + Args: + user (str): the username + + Returns: + bool: True if the username is present, False if not + """ + data = self.query_db( + "SELECT username FROM user WHERE username=?", (user,), one=True + ) + return True if data is not None else False + + def createUser(self, user, password, role, salt): + """create an user from the AdminCommands class. + + Args: + user (str): the username of the user + password (str): a hashed password + role (str): the role of the user + salt (str): a salt for the password + """ + self.query_db( + "INSERT OR IGNORE INTO user (username, password, role, salt) VALUES (?,?,?,?)", + (user, password, role, salt), + ) + # check if user was created + return ( + self.query_db( + "SELECT username FROM user WHERE username=?", (user,), one=True + ) + is not None + ) + + def deleteUser(self, user): + """delete an unser + + Args: + user (str): username of the user + """ + self.query_db("DELETE FROM user WHERE username=?", (user,)) + + def updateUser(self, username, data: dict[str, str]): + """changge the data of a user + + Args: + username (str): the username of the user + data (dict[str, str]): the data to be changed + """ + conn = self.connect() + cursor = conn.cursor() + query = "UPDATE user SET " + for key, value in data.items(): + if key == "username": + continue + query += f"{key}='{value}'," + query = query[:-1] + query += " WHERE username=?" + params = (username,) + cursor.execute(query, params) + conn.commit() + self.close_connection(conn) + + def getFacultyMember(self, name: str) -> tuple: + """get a faculty member based on the name + + Args: + name (str): the name to be searched for + + Returns: + tuple: a tuple containing the data of the faculty member + """ + return self.query_db( + "SELECT titel, fname,lname,mail,telnr,fullname FROM prof WHERE fullname=?", + (name,), + one=True, + ) + + def updateFacultyMember(self, data: dict, oldlname: str, oldfname: str): + """update the data of a faculty member + + Args: + data (dict): a dictionary containing the data to be updated + oldlname (str): the old last name of the faculty member + oldfname (str): the old first name of the faculty member + """ + placeholders = ", ".join([f"{i}=:{i} " for i in data.keys()]) + query = f"UPDATE prof SET {placeholders} WHERE lname = :oldlname AND fname = :oldfname" + data["oldlname"] = oldlname + data["oldfname"] = oldfname + self.query_db(query, data) + + def getFacultyMembers(self): + """get a list of all faculty members + + Returns: + list[tuple]: a list of tuples containing the faculty members + """ + return self.query_db("SELECT titel, fname,lname,mail,telnr,fullname FROM prof") + + def restoreApparat(self, app_id: Union[str, int], app_name: str): + """restore an apparat from the database + + Args: + app_id (Union[str, int]): the id of the apparat + """ + return self.query_db( + "UPDATE semesterapparat SET deletion_status=0, deleted_date=NULL WHERE appnr=? and name=?", + (app_id, app_name), + ) + + # ELSA + + def createElsaApparat(self, date, prof_id, semester) -> int: + """create a new apparat in the database for the ELSA system + + Args: + date (str): the name of the apparat + prof_id (int): the id of the professor + semester (str): the semester the apparat is created in + + Returns: + int: the id of the apparat + """ + self.query_db( + "INSERT OR IGNORE INTO elsa (date, prof_id, semester) VALUES (?,?,?)", + (date, prof_id, semester), + ) + # get the id of the apparat + apparat_id = self.query_db( + "SELECT id FROM elsa WHERE date=? AND prof_id=? AND semester=?", + (date, prof_id, semester), + one=True, + )[0] + return apparat_id + + def updateElsaApparat(self, date, prof_id, semester, elsa_id): + """update an ELSA apparat in the database + + Args: + date (str): the name of the apparat + prof_id (int): the id of the professor + semester (str): the semester the apparat is created in + elsa_id (int): the id of the ELSA apparat + """ + self.query_db( + "UPDATE elsa SET date=?, prof_id=?, semester=? WHERE id=?", + (date, prof_id, semester, elsa_id), + ) + + def addElsaMedia(self, data: dict, elsa_id: int): + """add a media to the ELSA system + + Args: + data (dict): a dictionary containing the data of the media, + elsa_id (int): the id of the ELSA apparat + """ + headers = [] + entries = [] + for key, value in data.items(): + headers.append(key) + entries.append(value) + headers.append("elsa_id") + entries.append(elsa_id) + query = f"INSERT INTO elsa_media ({', '.join(headers)}) VALUES ({', '.join(['?' for i in range(len(headers))])})" + self.query_db(query, entries) + + def getElsaMedia(self, elsa_id: int): + """get all the media of an ELSA apparat + + Args: + elsa_id (int): the id of the ELSA apparat + + Returns: + list[tuple]: a list of tuples containing the media + """ + media = self.query_db("SELECT * FROM elsa_media WHERE elsa_id=?", (elsa_id,)) + # convert the media to a list of dictionaries + ret = [] + table_fields = self.query_db("PRAGMA table_info(elsa_media)") + for m in media: + tmp = {} + for i in range(len(m)): + tmp[table_fields[i][1]] = m[i] + ret.append(tmp) + return ret + + def insertElsaFile(self, file: list[dict], elsa_id: int): + """Instert a list of files into the ELSA system + + Args: + file (list[dict]): a list containing all the files to be inserted + Structured: [{"name": "filename", "path": "path", "type": "filetype"}] + elsa_id (int): the id of the ELSA apparat + """ + for f in file: + filename = f["name"] + path = f["path"] + filetyp = f["type"] + if path == "Database": + continue + blob = create_blob(path) + query = "INSERT OR IGNORE INTO elsa_files (filename, fileblob, elsa_id, filetyp) VALUES (?, ?, ?, ?)" + self.query_db(query, (filename, blob, elsa_id, filetyp)) + + def recreateElsaFile(self, filename: str, filetype: str) -> str: + """Recreate a file from the ELSA system + + Args: + filename (str): the name of the file + elsa_id (int): the id of the ELSA apparat + filetype (str): the extension of the file to be created + + Returns: + str: The filename of the recreated file + """ + blob = self.query_db( + "SELECT fileblob FROM elsa_files WHERE filename=?", (filename,), one=True + )[0] + # log.debug(blob) + tempdir = settings.database.temp.expanduser() + if not tempdir.exists(): + tempdir.mkdir(parents=True, exist_ok=True) + + file = tempfile.NamedTemporaryFile( + delete=False, dir=tempdir, mode="wb", suffix=f".{filetype}" + ) + file.write(blob) + # log.debug("file created") + return file.name + + def getElsaApparats(self) -> ELSA: + """Get all the ELSA apparats in the database + + Returns: + list[tuple]: a list of tuples containing the ELSA apparats + """ + return self.query_db( + "SELECT * FROM elsa ORDER BY substr(date, 7, 4) || '-' || substr(date, 4, 2) || '-' || substr(date, 1, 2)" + ) + + def getElsaId(self, prof_id: int, semester: str, date: str) -> int: + """get the id of an ELSA apparat based on the professor, semester and date + + Args: + prof_id (int): the id of the professor + semester (str): the semester + date (str): the date of the apparat + + Returns: + int: the id of the ELSA apparat + """ + + data = self.query_db( + "SELECT id FROM elsa WHERE prof_id=? AND semester=? AND date=?", + (prof_id, semester, date), + one=True, + ) + if data is None: + return None + return data[0] + + def getElsaFiles(self, elsa_id: int): + """get all the files of an ELSA apparat + + Args: + elsa_id (int): the id of the ELSA apparat + + Returns: + list[tuple]: a list of tuples containing the files + """ + return self.query_db( + "SELECT filename, filetyp FROM elsa_files WHERE elsa_id=?", (elsa_id,) + ) + + ### + + def createProf(self, profdata: Prof): + log.debug(profdata) + conn = self.connect() + cursor = conn.cursor() + fname = profdata.firstname + lname = profdata.lastname + fullname = f"{lname} {fname}" + mail = profdata.mail + telnr = profdata.telnr + title = profdata.title + + query = "INSERT INTO prof (fname, lname, fullname, mail, telnr, titel) VALUES (?,?,?,?,?,?)" + log.debug(query) + cursor.execute(query, (fname, lname, fullname, mail, telnr, title)) + + conn.commit() + conn.close() + return self.getProfId(profdata) + + def getElsaProfId(self, profname): + query = f"SELECT id FROM elsa_prof WHERE fullname = '{profname}'" + data = self.query_db(query) + if data: + return data[0][0] + else: + return None + + def getElsaProfs(self) -> list[str]: + query = "SELECT fullname FROM elsa_prof" + data = self.query_db(query) + if data: + return [i[0] for i in data] + else: + return [] + + def getProfId(self, profdata: dict[str, Any] | Prof): + """Get the prof ID based on the profdata + + Args: + profdata (dict | Prof): either a dictionary containing the prof data or a Prof object + + Returns: + int | None: The id of the prof or None if not found + """ + conn = self.connect() + cursor = conn.cursor() + if isinstance(profdata, dict): + name = profdata["profname"] + if "," in name: + fname = name.split(", ")[1].strip() + lname = name.split(", ")[0].strip() + fullname = f"{lname} {fname}" + else: + fullname = profdata["profname"] + else: + fullname = profdata.name() + query = "SELECT id FROM prof WHERE fullname = ?" + log.debug(query) + + cursor.execute(query, (fullname,)) + result = cursor.fetchone() + if result: + return result[0] + else: + return None + + def getProfByName(self, fullname): + """Get all Data of the prof based on fullname + + Args: + fullname (str): The full name of the prof + """ + conn = self.connect() + cursor = conn.cursor() + query = "SELECT * FROM prof WHERE fullname = ?" + log.debug(query) + + result = cursor.execute(query, (fullname,)).fetchone() + if result: + return Prof().from_tuple(result) + else: + return Prof() + + def getProfIDByApparat(self, apprarat_id: int) -> Optional[int]: + """Get the prof id based on the semesterapparat id from the database + + Args: + apprarat_id (int): Number of the apparat + + Returns: + + int | None: The id of the prof or None if not found + """ + query = "SELECT prof_id from semesterapparat WHERE appnr = ? and deletion_status = 0" + data = self.query_db(query, (apprarat_id,)) + if data: + log.info("Prof ID: " + str(data[0][0])) + return data[0][0] + else: + return None + + def copyBookToApparat(self, book_id: int, apparat: int): + # get book data + new_apparat_id = apparat + new_prof_id = self.getProfIDByApparat(new_apparat_id) + query = ( + "INSERT INTO media (bookdata, app_id, prof_id, deleted, available, reservation) " + "SELECT bookdata, ?, ?, 0, available, reservation FROM media WHERE id = ?" + ) + connection = self.connect() + cursor = connection.cursor() + cursor.execute(query, (new_apparat_id, new_prof_id, book_id)) + connection.commit() + connection.close() + + def moveBookToApparat(self, book_id: int, appratat: int): + """Move the book to the new apparat + + Args: + book_id (int): the ID of the book + appratat (int): the ID of the new apparat + """ + # get book data + query = "UPDATE media SET app_id = ? WHERE id = ?" + connection = self.connect() + cursor = connection.cursor() + cursor.execute(query, (appratat, book_id)) + connection.commit() + connection.close() + + def getApparatNameByAppNr(self, appnr: int): + query = ( + "SELECT name FROM semesterapparat WHERE appnr = ? and deletion_status = 0" + ) + data = self.query_db(query, (appnr,)) + if data: + return data[0][0] + else: + return None + + def fetch_one(self, query: str, args: tuple[Any, ...] = ()) -> tuple[Any, ...]: + connection = self.connect() + cursor = connection.cursor() + cursor.execute(query, args) + result = cursor.fetchone() + connection.close() + return result + + def getBookIdByPPN(self, ppn: str) -> int: + query = "SELECT id FROM media WHERE bookdata LIKE ?" + data = self.query_db(query, (f"%{ppn}%",)) + if data: + return data[0][0] + else: + return None + + def getNewEditionsByApparat(self, apparat_id: int) -> list[BookData]: + """Get all new editions for a specific apparat + + Args: + apparat_id (int): the id of the apparat + + Returns: + list[tuple]: A list of tuples containing the new editions data + """ + query = "SELECT * FROM neweditions WHERE for_apparat=? AND ordered=0" + results = self.query_db(query, (apparat_id,)) + res = [] + for result in results: + # keep only new edition payload; old edition can be reconstructed if needed + res.append(BookData().from_string(result[1])) + return res + + def setOrdered(self, newBook_id: int): + query = "UPDATE neweditions SET ordered=1 WHERE id=?" + self.query_db(query, (newBook_id,)) + + def getBooksWithNewEditions(self, app_id) -> List[BookData]: + # select all bookdata from media, based on the old_edition_id in neweditions where for_apparat = app_id; also get the new_edition bookdata + + query = "SELECT m.bookdata, new_bookdata FROM media m JOIN neweditions n ON m.id = n.old_edition_id WHERE n.for_apparat = ?" + results = self.query_db(query, (app_id,)) + # store results in tuple old,new + res = [] + for result in results: + oldedition = BookData().from_string(result[0]) + newedition = BookData().from_string(result[1]) + res.append((oldedition, newedition)) + return res + + def getNewEditionId(self, newBook: BookData): + query = "SELECT id FROM neweditions WHERE new_bookdata LIKE ?" + args = ( + newBook.isbn[0] if newBook.isbn and len(newBook.isbn) > 0 else newBook.ppn + ) + params = (f"%{args}%",) + data = self.query_db(query, params, one=True) + if data: + return data[0] + else: + return None + + def insertNewEdition(self, newBook: BookData, oldBookId: int, for_apparat: int): + # check if new edition already in table, check based on newBook.ppn + check_query = "SELECT id FROM neweditions WHERE new_bookdata LIKE ?" + check_params = (f"%{newBook.ppn}%",) + data = self.query_db(check_query, check_params, one=True) + if data: + log.info("New edition already in table, skipping insert") + return + + query = "INSERT INTO neweditions (new_bookdata, old_edition_id, for_apparat) VALUES (?,?,?)" + params = (newBook.to_dict, oldBookId, for_apparat) + + self.query_db(query, params) diff --git a/src/database/migrations/V001__create_base_tables.sql b/src/database/migrations/V001__create_base_tables.sql new file mode 100644 index 0000000..4848add --- /dev/null +++ b/src/database/migrations/V001__create_base_tables.sql @@ -0,0 +1,132 @@ +BEGIN TRANSACTION; + +CREATE TABLE IF NOT EXISTS semesterapparat ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name TEXT, + prof_id INTEGER, + fach TEXT, + appnr INTEGER, + erstellsemester TEXT, + verlängert_am TEXT, + dauer BOOLEAN, + verlängerung_bis TEXT, + deletion_status INTEGER, + deleted_date TEXT, + apparat_id_adis INTEGER, + prof_id_adis INTEGER, + konto INTEGER, + FOREIGN KEY (prof_id) REFERENCES prof (id) + ); + +CREATE TABLE IF NOT EXISTS media ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + bookdata TEXT, + app_id INTEGER, + prof_id INTEGER, + deleted INTEGER DEFAULT (0), + available BOOLEAN, + reservation BOOLEAN, + FOREIGN KEY (prof_id) REFERENCES prof (id), + FOREIGN KEY (app_id) REFERENCES semesterapparat (id) + ); + +CREATE TABLE IF NOT EXISTS files ( + id INTEGER PRIMARY KEY, + filename TEXT, + fileblob BLOB, + app_id INTEGER, + filetyp TEXT, + prof_id INTEGER REFERENCES prof (id), + FOREIGN KEY (app_id) REFERENCES semesterapparat (id) + ); + +CREATE TABLE IF NOT EXISTS messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + created_at date NOT NULL DEFAULT CURRENT_TIMESTAMP, + message TEXT NOT NULL, + remind_at date NOT NULL DEFAULT CURRENT_TIMESTAMP, + user_id INTEGER NOT NULL, + appnr INTEGER, + FOREIGN KEY (user_id) REFERENCES user (id) + ); + +CREATE TABLE IF NOT EXISTS prof ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + titel TEXT, + fname TEXT, + lname TEXT, + fullname TEXT NOT NULL UNIQUE, + mail TEXT, + telnr TEXT + ); + +CREATE TABLE IF NOT EXISTS user ( + id integer NOT NULL PRIMARY KEY AUTOINCREMENT, + created_at datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, + username TEXT NOT NULL UNIQUE, + password TEXT NOT NULL, + salt TEXT NOT NULL, + role TEXT NOT NULL, + email TEXT UNIQUE, + name TEXT + ); + +CREATE TABLE IF NOT EXISTS subjects ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name TEXT NOT NULL UNIQUE +); + +CREATE TABLE IF NOT EXISTS elsa ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + date TEXT NOT NULL, + semester TEXT NOT NULL, + prof_id INTEGER NOT NULL + ); + +CREATE TABLE IF NOT EXISTS elsa_files ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + filename TEXT NOT NULL, + fileblob BLOB NOT NULL, + elsa_id INTEGER NOT NULL, + filetyp TEXT NOT NULL, + FOREIGN KEY (elsa_id) REFERENCES elsa (id) + ); + +CREATE TABLE IF NOT EXISTS elsa_media ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + work_author TEXT, + section_author TEXT, + year TEXT, + edition TEXT, + work_title TEXT, + chapter_title TEXT, + location TEXT, + publisher TEXT, + signature TEXT, + issue TEXT, + pages TEXT, + isbn TEXT, + type TEXT, + elsa_id INTEGER NOT NULL, + FOREIGN KEY (elsa_id) REFERENCES elsa (id) + ); + +CREATE TABLE IF NOT EXISTS neweditions ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + new_bookdata TEXT, + old_edition_id INTEGER, + for_apparat INTEGER, + ordered BOOLEAN DEFAULT (0), + FOREIGN KEY (old_edition_id) REFERENCES media (id), + FOREIGN KEY (for_apparat) REFERENCES semesterapparat (id) +); + +-- Helpful indices to speed up frequent lookups and joins +CREATE INDEX IF NOT EXISTS idx_media_app_prof ON media(app_id, prof_id); +CREATE INDEX IF NOT EXISTS idx_media_deleted ON media(deleted); +CREATE INDEX IF NOT EXISTS idx_media_available ON media(available); +CREATE INDEX IF NOT EXISTS idx_messages_remind_at ON messages(remind_at); +CREATE INDEX IF NOT EXISTS idx_semesterapparat_prof ON semesterapparat(prof_id); +CREATE INDEX IF NOT EXISTS idx_semesterapparat_appnr ON semesterapparat(appnr); + +COMMIT; diff --git a/src/database/migrations/V002__create_table_webadis_login.sql b/src/database/migrations/V002__create_table_webadis_login.sql new file mode 100644 index 0000000..5e1b3a8 --- /dev/null +++ b/src/database/migrations/V002__create_table_webadis_login.sql @@ -0,0 +1,10 @@ +BEGIN TRANSACTION; + +CREATE TABLE IF NOT EXISTS webadis_login ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + username TEXT NOT NULL, + password TEXT NOT NULL + ); + +COMMIT; + diff --git a/src/database/migrations/V003_update_webadis_add_user_area.sql b/src/database/migrations/V003_update_webadis_add_user_area.sql new file mode 100644 index 0000000..1b5567b --- /dev/null +++ b/src/database/migrations/V003_update_webadis_add_user_area.sql @@ -0,0 +1,6 @@ +BEGIN TRANSACTION; + +ALTER TABLE webadis_login +ADD COLUMN effective_range TEXT; + +COMMIT; \ No newline at end of file diff --git a/src/database/schemas.py b/src/database/schemas.py new file mode 100644 index 0000000..991cb59 --- /dev/null +++ b/src/database/schemas.py @@ -0,0 +1,112 @@ +CREATE_TABLE_APPARAT = """CREATE TABLE semesterapparat ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name TEXT, + prof_id INTEGER, + fach TEXT, + appnr INTEGER, + erstellsemester TEXT, + verlängert_am TEXT, + dauer BOOLEAN, + verlängerung_bis TEXT, + deletion_status INTEGER, + deleted_date TEXT, + apparat_id_adis INTEGER, + prof_id_adis INTEGER, + konto INTEGER, + FOREIGN KEY (prof_id) REFERENCES prof (id) + )""" +CREATE_TABLE_MEDIA = """CREATE TABLE media ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + bookdata TEXT, + app_id INTEGER, + prof_id INTEGER, + deleted INTEGER DEFAULT (0), + available BOOLEAN, + reservation BOOLEAN, + FOREIGN KEY (prof_id) REFERENCES prof (id), + FOREIGN KEY (app_id) REFERENCES semesterapparat (id) + )""" + +CREATE_TABLE_FILES = """CREATE TABLE files ( + id INTEGER PRIMARY KEY, + filename TEXT, + fileblob BLOB, + app_id INTEGER, + filetyp TEXT, + prof_id INTEGER REFERENCES prof (id), + FOREIGN KEY (app_id) REFERENCES semesterapparat (id) + )""" +CREATE_TABLE_MESSAGES = """CREATE TABLE messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + created_at date NOT NULL DEFAULT CURRENT_TIMESTAMP, + message TEXT NOT NULL, + remind_at date NOT NULL DEFAULT CURRENT_TIMESTAMP, + user_id INTEGER NOT NULL, + appnr INTEGER, + FOREIGN KEY (user_id) REFERENCES user (id) + )""" +CREATE_TABLE_PROF = """CREATE TABLE prof ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + titel TEXT, + fname TEXT, + lname TEXT, + fullname TEXT NOT NULL UNIQUE, + mail TEXT, + telnr TEXT + )""" +CREATE_TABLE_USER = """CREATE TABLE user ( + id integer NOT NULL PRIMARY KEY AUTOINCREMENT, + created_at datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, + username TEXT NOT NULL UNIQUE, + password TEXT NOT NULL, + salt TEXT NOT NULL, + role TEXT NOT NULL, + email TEXT UNIQUE, + name TEXT + )""" +CREATE_TABLE_SUBJECTS = """CREATE TABLE subjects ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name TEXT NOT NULL UNIQUE +)""" + +CREATE_ELSA_TABLE = """CREATE TABLE elsa ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + date TEXT NOT NULL, + semester TEXT NOT NULL, + prof_id INTEGER NOT NULL + )""" +CREATE_ELSA_FILES_TABLE = """CREATE TABLE elsa_files ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + filename TEXT NOT NULL, + fileblob BLOB NOT NULL, + elsa_id INTEGER NOT NULL, + filetyp TEXT NOT NULL, + FOREIGN KEY (elsa_id) REFERENCES elsa (id) + )""" +CREATE_ELSA_MEDIA_TABLE = """CREATE TABLE elsa_media ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + work_author TEXT, + section_author TEXT, + year TEXT, + edition TEXT, + work_title TEXT, + chapter_title TEXT, + location TEXT, + publisher TEXT, + signature TEXT, + issue TEXT, + pages TEXT, + isbn TEXT, + type TEXT, + elsa_id INTEGER NOT NULL, + FOREIGN KEY (elsa_id) REFERENCES elsa (id) + )""" +CREATE_TABLE_NEWEDITIONS = """CREATE TABLE neweditions ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + new_bookdata TEXT, + old_edition_id INTEGER, + for_apparat INTEGER, + ordered BOOLEAN DEFAULT (0), + FOREIGN KEY (old_edition_id) REFERENCES media (id), + FOREIGN KEY (for_apparat) REFERENCES semesterapparat (id) +)""" diff --git a/src/documents/__init__.py b/src/documents/__init__.py new file mode 100644 index 0000000..139597f --- /dev/null +++ b/src/documents/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/src/documents/generators.py b/src/documents/generators.py new file mode 100644 index 0000000..bd163cd --- /dev/null +++ b/src/documents/generators.py @@ -0,0 +1,371 @@ +import os +from datetime import datetime +from os.path import basename + +from docx import Document +from docx.enum.text import WD_PARAGRAPH_ALIGNMENT +from docx.oxml import OxmlElement +from docx.oxml.ns import qn +from docx.shared import Cm, Pt, RGBColor + +from src import settings +from src.shared.logging import log + +logger = log + +font = "Cascadia Mono" + + +def print_document(file: str) -> None: + # send document to printer as attachment of email + import smtplib + from email.mime.application import MIMEApplication + from email.mime.multipart import MIMEMultipart + from email.mime.text import MIMEText + + smtp = settings.mail.smtp_server + port = settings.mail.port + sender_email = settings.mail.sender + password = settings.mail.password + receiver = settings.mail.printer_mail + message = MIMEMultipart() + message["From"] = sender_email + message["To"] = receiver + message["cc"] = settings.mail.sender + message["Subject"] = "." + mail_body = "." + message.attach(MIMEText(mail_body, "html")) + with open(file, "rb") as fil: + part = MIMEApplication(fil.read(), Name=basename(file)) + # After the file is closed + part["Content-Disposition"] = 'attachment; filename="%s"' % basename(file) + message.attach(part) + mail = message.as_string() + with smtplib.SMTP_SSL(smtp, port) as server: + server.connect(smtp, port) + server.login(settings.mail.user_name, password) + server.sendmail(sender_email, receiver, mail) + server.quit() + log.success("Mail sent") + + +class SemesterError(Exception): + """Custom exception for semester-related errors.""" + + def __init__(self, message: str): + super().__init__(message) + log.error(message) + + def __str__(self): + return f"SemesterError: {self.args[0]}" + + +class SemesterDocument: + def __init__( + self, + apparats: list[tuple[int, str]], + semester: str, + filename: str, + full: bool = False, + ): + assert isinstance(apparats, list), SemesterError( + "Apparats must be a list of tuples" + ) + assert all(isinstance(apparat, tuple) for apparat in apparats), SemesterError( + "Apparats must be a list of tuples" + ) + assert all(isinstance(apparat[0], int) for apparat in apparats), SemesterError( + "Apparat numbers must be integers" + ) + assert all(isinstance(apparat[1], str) for apparat in apparats), SemesterError( + "Apparat names must be strings" + ) + assert isinstance(semester, str), SemesterError("Semester must be a string") + assert "." not in filename and isinstance(filename, str), SemesterError( + "Filename must be a string and not contain an extension" + ) + self.doc = Document() + self.apparats = apparats + self.semester = semester + self.table_font_normal = font + self.table_font_bold = font + self.header_font = font + self.header_font_size = Pt(26) + self.sub_header_font_size = Pt(18) + self.table_font_size = Pt(10) + self.color_red = RGBColor(255, 0, 0) + self.color_blue = RGBColor(0, 0, 255) + self.filename = filename + if full: + log.info("Full document generation") + self.cleanup + log.info("Cleanup done") + self.make_document() + log.info("Document created") + self.create_pdf() + log.info("PDF created") + print_document(self.filename + ".pdf") + log.info("Document printed") + + def set_table_border(self, table): + """ + Adds a full border to the table. + + :param table: Table object to which the border will be applied. + """ + tbl = table._element + tbl_pr = tbl.xpath("w:tblPr")[0] + tbl_borders = OxmlElement("w:tblBorders") + + # Define border styles + for border_name in ["top", "left", "bottom", "right", "insideH", "insideV"]: + border = OxmlElement(f"w:{border_name}") + border.set(qn("w:val"), "single") + border.set(qn("w:sz"), "4") # Thickness of the border + border.set(qn("w:space"), "0") + border.set(qn("w:color"), "000000") # Black color + tbl_borders.append(border) + + tbl_pr.append(tbl_borders) + + def create_sorted_table(self) -> None: + # Sort the apparats list by the string in the tuple (index 1) + self.apparats.sort(key=lambda x: x[1]) + # Create a table with rows equal to the length of the apparats list + table = self.doc.add_table(rows=len(self.apparats), cols=2) + table.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER + + # Set column widths by directly modifying the cell properties + widths = [Cm(1.19), Cm(10)] + for col_idx, width in enumerate(widths): + for cell in table.columns[col_idx].cells: + cell_width_element = cell._element.xpath(".//w:tcPr")[0] + tcW = OxmlElement("w:tcW") + tcW.set(qn("w:w"), str(int(width.cm * 567))) # Convert cm to twips + tcW.set(qn("w:type"), "dxa") + cell_width_element.append(tcW) + + # Adjust row heights + for row in table.rows: + trPr = row._tr.get_or_add_trPr() # Get or add the element + trHeight = OxmlElement("w:trHeight") + trHeight.set( + qn("w:val"), str(int(Pt(15).pt * 20)) + ) # Convert points to twips + trHeight.set(qn("w:hRule"), "exact") # Use "exact" for fixed height + trPr.append(trHeight) + + # Fill the table with sorted data + for row_idx, (number, name) in enumerate(self.apparats): + row = table.rows[row_idx] + + # Set font for the first column (number) + cell_number_paragraph = row.cells[0].paragraphs[0] + cell_number_run = cell_number_paragraph.add_run(str(number)) + cell_number_run.font.name = self.table_font_bold + cell_number_run.font.size = self.table_font_size + cell_number_run.font.bold = True + cell_number_run.font.color.rgb = self.color_red + cell_number_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER + + # Set font for the second column (name) + cell_name_paragraph = row.cells[1].paragraphs[0] + words = name.split() + if words: + # Add the first word in bold + bold_run = cell_name_paragraph.add_run(words[0]) + bold_run.font.bold = True + bold_run.font.name = self.table_font_bold + bold_run.font.size = self.table_font_size + + # Add the rest of the words normally + if len(words) > 1: + normal_run = cell_name_paragraph.add_run(" " + " ".join(words[1:])) + normal_run.font.name = self.table_font_normal + normal_run.font.size = self.table_font_size + cell_name_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT + + self.set_table_border(table) + + def make_document(self): + # Create a new Document + section = self.doc.sections[0] + section.top_margin = Cm(2.54) # Default 1 inch (can adjust as needed) + section.bottom_margin = Cm(1.5) # Set bottom margin to 1.5 cm + section.left_margin = Cm(2.54) # Default 1 inch + section.right_margin = Cm(2.54) # Default 1 inch + + # Add the current date + current_date = datetime.now().strftime("%Y-%m-%d") + date_paragraph = self.doc.add_paragraph(current_date) + date_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT + + # Add a header + semester = f"Semesterapparate {self.semester}" + header = self.doc.add_paragraph(semester) + header.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER + header_run = header.runs[0] + header_run.font.name = self.header_font + header_run.font.size = self.header_font_size + header_run.font.bold = True + header_run.font.color.rgb = self.color_blue + + sub_header = self.doc.add_paragraph("(Alphabetisch)") + sub_header.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER + sub_header_run = sub_header.runs[0] + sub_header_run.font.name = self.header_font + sub_header_run.font.size = self.sub_header_font_size + sub_header_run.font.color.rgb = self.color_red + + self.doc.add_paragraph("") + + self.create_sorted_table() + + def save_document(self, name: str) -> None: + # Save the document + self.doc.save(name) + + def create_pdf(self) -> None: + # Save the document + import comtypes.client + + word = comtypes.client.CreateObject("Word.Application") # type: ignore + self.save_document(self.filename + ".docx") + docpath = os.path.abspath(self.filename + ".docx") + doc = word.Documents.Open(docpath) + curdir = os.getcwd() + doc.SaveAs(f"{curdir}/{self.filename}.pdf", FileFormat=17) + doc.Close() + word.Quit() + log.debug("PDF saved") + + @property + def cleanup(self) -> None: + if os.path.exists(f"{self.filename}.docx"): + os.remove(f"{self.filename}.docx") + os.remove(f"{self.filename}.pdf") + + @property + def send(self) -> None: + print_document(self.filename + ".pdf") + log.debug("Document sent to printer") + + +class SemapSchilder: + def __init__(self, entries: list[str]): + self.entries = entries + self.filename = "Schilder" + self.font_size = Pt(23) + self.font_name = font + self.doc = Document() + self.define_doc_properties() + self.add_entries() + self.cleanup() + self.create_pdf() + + def define_doc_properties(self): + # set the doc to have a top margin of 1cm, left and right are 0.5cm, bottom is 0cm + section = self.doc.sections[0] + section.top_margin = Cm(1) + section.bottom_margin = Cm(0) + section.left_margin = Cm(0.5) + section.right_margin = Cm(0.5) + + # set the font to Times New Roman, size 23 bold, color black + for paragraph in self.doc.paragraphs: + for run in paragraph.runs: + run.font.name = self.font_name + run.font.size = self.font_size + run.font.bold = True + run.font.color.rgb = RGBColor(0, 0, 0) + paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER + + # if the length of the text is + + def add_entries(self): + for entry in self.entries: + paragraph = self.doc.add_paragraph(entry) + paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER + paragraph.paragraph_format.line_spacing = Pt(23) # Set fixed line spacing + paragraph.paragraph_format.space_before = Pt(2) # Remove spacing before + paragraph.paragraph_format.space_after = Pt(2) # Remove spacing after + + run = paragraph.runs[0] + run.font.name = self.font_name + run.font.size = self.font_size + run.font.bold = True + run.font.color.rgb = RGBColor(0, 0, 0) + + # Add a line to be used as a guideline for cutting + line = self.doc.add_paragraph() + line.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER + line.paragraph_format.line_spacing = Pt(23) # Match line spacing + line.paragraph_format.space_before = Pt(2) # Remove spacing before + line.paragraph_format.space_after = Pt(2) # Remove spacing after + line.add_run("--------------------------") + + def save_document(self): + # Save the document + self.doc.save(f"{self.filename}.docx") + log.debug(f"Document saved as {self.filename}.docx") + + def create_pdf(self) -> None: + # Save the document + import comtypes.client + + word = comtypes.client.CreateObject("Word.Application") # type: ignore + self.save_document() + docpath = os.path.abspath(f"{self.filename}.docx") + doc = word.Documents.Open(docpath) + curdir = os.getcwd() + doc.SaveAs(f"{curdir}/{self.filename}.pdf", FileFormat=17) + doc.Close() + word.Quit() + log.debug("PDF saved") + + def cleanup(self) -> None: + if os.path.exists(f"{self.filename}.docx"): + os.remove(f"{self.filename}.docx") + if os.path.exists(f"{self.filename}.pdf"): + os.remove(f"{self.filename}.pdf") + + @property + def send(self) -> None: + print_document(self.filename + ".pdf") + log.debug("Document sent to printer") + + +if __name__ == "__main__": + entries = [ + "Lüsebrink (Theorie und Praxis der Leichtathletik)", + "Kulovics (ISP-Betreuung)", + "Köhler (Ausgewählte Aspekte der materiellen Kultur Textil)", + "Grau (Young Adult Literature)", + "Schiebel (Bewegung II:Ausgewählte Problemfelder)", + "Schiebel (Ernährungswiss. Perspektive)", + "Park (Kommunikation und Kooperation)", + "Schiebel (Schwimmen)", + "Huppertz (Philosophieren mit Kindern)", + "Heyl (Heyl)", + "Reuter (Verschiedene Veranstaltungen)", + "Reinhold (Arithmetik und mathematisches Denken)", + "Wirtz (Forschungsmethoden)", + "Schleider (Essstörungen)", + "Schleider (Klinische Psychologie)", + "Schleider (Doktorandenkolloquium)", + "Schleider (Störungen Sozialverhaltens/Delinquenz)", + "Burth (EU Forschung im Int. Vergleich/EU Gegenstand biling. Didaktik)", + "Reinhardt (Einführung Politikdidaktik)", + "Schleider (Psychologische Interventionsmethoden)", + "Schleider (ADHS)", + "Schleider (Beratung und Teamarbeit)", + "Schleider (LRS)", + "Schleider (Gesundheitspsychologie)", + "Schleider (Elterntraining)", + "Wulff (Hochschulzertifikat DaZ)", + "Dinkelaker ( )", + "Droll (Einführung in die Sprachwissenschaft)", + "Karoß (Gymnastik - Sich Bewegen mit und ohne Handgeräte)", + "Sahrai (Kindheit und Gesellschaft)", + ] + doc = SemapSchilder(entries) diff --git a/src/parsers/__init__.py b/src/parsers/__init__.py new file mode 100644 index 0000000..bbe28e3 --- /dev/null +++ b/src/parsers/__init__.py @@ -0,0 +1,13 @@ +__all__ = [ + "csv_to_list", + "pdf_to_csv", + "word_to_semap", + "eml_parser", + "eml_to_semap", +] + + +from .csv_parser import csv_to_list +from .pdf_parser import pdf_to_csv +from .word_parser import word_to_semap +from .xml_parser import eml_parser, eml_to_semap diff --git a/src/parsers/csv_parser.py b/src/parsers/csv_parser.py new file mode 100644 index 0000000..750d8e8 --- /dev/null +++ b/src/parsers/csv_parser.py @@ -0,0 +1,23 @@ +import csv + +from charset_normalizer import detect + + +def csv_to_list(path: str) -> list[str]: + """ + Extracts the data from a csv file and returns it as a pandas dataframe + """ + encoding = detect(open(path, "rb").read())["encoding"] + with open(path, newline="", encoding=encoding) as csvfile: + # if decoder fails to map, assign "" + reader = csv.reader(csvfile, delimiter=";", quotechar="|") + ret = [] + for row in reader: + ret.append(row[0].replace('"', "")) + return ret + + +if __name__ == "__main__": + text = csv_to_list("C:/Users/aky547/Desktop/semap/71.csv") + # remove linebreaks + # #print(text) diff --git a/src/parsers/pdf_parser.py b/src/parsers/pdf_parser.py new file mode 100644 index 0000000..de5e87a --- /dev/null +++ b/src/parsers/pdf_parser.py @@ -0,0 +1,23 @@ +# add depend path to system path + +from pdfquery import PDFQuery + + +def pdf_to_csv(path: str) -> str: + """ + Extracts the data from a pdf file and returns it as a pandas dataframe + """ + file = PDFQuery(path) + file.load() + # get the text from the pdf file + text_elems = file.extract([("with_formatter", "text"), ("all_text", "*")]) + extracted_text = text_elems["all_text"] + + return extracted_text + + +if __name__ == "__main__": + text = pdf_to_csv("54_pdf.pdf") + # remove linebreaks + text = text.replace("\n", "") + # print(text) diff --git a/src/parsers/transformers/__init__.py b/src/parsers/transformers/__init__.py new file mode 100644 index 0000000..96f8070 --- /dev/null +++ b/src/parsers/transformers/__init__.py @@ -0,0 +1,8 @@ +from .transformers import ( + RDS_AVAIL_DATA, + ARRAYData, + BibTeXData, + COinSData, + RDSData, + RISData, +) diff --git a/src/parsers/transformers/schemas.py b/src/parsers/transformers/schemas.py new file mode 100644 index 0000000..cc479ea --- /dev/null +++ b/src/parsers/transformers/schemas.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +from typing import Optional, Any, List +from dataclasses import dataclass +from dataclasses import field as dataclass_field +import json + + +@dataclass +class Item: + superlocation: str | None = dataclass_field(default_factory=str) + status: str | None = dataclass_field(default_factory=str) + availability: str | None = dataclass_field(default_factory=str) + notes: str | None = dataclass_field(default_factory=str) + limitation: str | None = dataclass_field(default_factory=str) + duedate: str | None = dataclass_field(default_factory=str) + id: str | None = dataclass_field(default_factory=str) + item_id: str | None = dataclass_field(default_factory=str) + ilslink: str | None = dataclass_field(default_factory=str) + number: int | None = dataclass_field(default_factory=int) + barcode: str | None = dataclass_field(default_factory=str) + reserve: str | None = dataclass_field(default_factory=str) + callnumber: str | None = dataclass_field(default_factory=str) + department: str | None = dataclass_field(default_factory=str) + locationhref: str | None = dataclass_field(default_factory=str) + location: str | None = dataclass_field(default_factory=str) + + def from_dict(self, data: dict): + """Import data from dict""" + data = data["items"] + for entry in data: + for key, value in entry.items(): + setattr(self, key, value) + return self + + +@dataclass +class RDS_AVAIL_DATA: + """Class to store RDS availability data""" + + library_sigil: str = dataclass_field(default_factory=str) + items: List[Item] = dataclass_field(default_factory=list) + + def import_from_dict(self, data: str): + """Import data from dict""" + edata = json.loads(data) + # library sigil is first key + + self.library_sigil = str(list(edata.keys())[0]) + # get data from first key + edata = edata[self.library_sigil] + for location in edata: + item = Item(superlocation=location).from_dict(edata[location]) + + self.items.append(item) + return self + + +@dataclass +class RDS_DATA: + """Class to store RDS data""" + + RDS_SIGNATURE: str = dataclass_field(default_factory=str) + RDS_STATUS: str = dataclass_field(default_factory=str) + RDS_LOCATION: str = dataclass_field(default_factory=str) + RDS_URL: Any = dataclass_field(default_factory=str) + RDS_HINT: Any = dataclass_field(default_factory=str) + RDS_COMMENT: Any = dataclass_field(default_factory=str) + RDS_HOLDING: Any = dataclass_field(default_factory=str) + RDS_HOLDING_LEAK: Any = dataclass_field(default_factory=str) + RDS_INTERN: Any = dataclass_field(default_factory=str) + RDS_PROVENIENCE: Any = dataclass_field(default_factory=str) + RDS_LOCAL_NOTATION: str = dataclass_field(default_factory=str) + RDS_LEA: Any = dataclass_field(default_factory=str) + + def import_from_dict(self, data: dict) -> RDS_DATA: + """Import data from dict""" + for key, value in data.items(): + setattr(self, key, value) + return self + + +@dataclass +class RDS_GENERIC_DATA: + LibrarySigil: str = dataclass_field(default_factory=str) + RDS_DATA: List[RDS_DATA] = dataclass_field(default_factory=list) + + def import_from_dict(self, data: str) -> RDS_GENERIC_DATA: + """Import data from dict""" + edata = json.loads(data) + # library sigil is first key + self.LibrarySigil = str(list(edata.keys())[0]) + # get data from first key + edata = edata[self.LibrarySigil] + for entry in edata: + rds_data = RDS_DATA() # Create a new RDS_DATA instance + # Populate the RDS_DATA instance from the entry + # This assumes that the entry is a dictionary that matches the structure of the RDS_DATA class + rds_data.import_from_dict(entry) + self.RDS_DATA.append(rds_data) # Add the RDS_DATA instance to the list + return self + + +@dataclass +class LoksatzData: + type: Optional[str] = None + adis_idn: Optional[str] = None + t_idn: Optional[str] = None + ktrl_nr: Optional[str] = None + adis_isil: Optional[str] = None + adis_sigel: Optional[str] = None + bib_sigel: Optional[str] = None + standort: Optional[str] = None + signatur: Optional[str] = None + ausleihcode: Optional[str] = None + sig_katalog: Optional[str] = None + erwerb_datum: Optional[str] = None + medientypcode: Optional[str] = None + bestellart: Optional[str] = None + faecherstatistik: Optional[str] = None + exemplar_stat: Optional[str] = None + so_standort: Optional[str] = None diff --git a/src/parsers/transformers/transformers.py b/src/parsers/transformers/transformers.py new file mode 100644 index 0000000..ade70b7 --- /dev/null +++ b/src/parsers/transformers/transformers.py @@ -0,0 +1,522 @@ +from __future__ import annotations + +import json +import re +import sys +from dataclasses import dataclass +from dataclasses import field as dataclass_field +from typing import Any, List + +import loguru + +from src import LOG_DIR +from src.logic.dataclass import BookData + +log = loguru.logger +log.remove() +log.add(sys.stdout, level="INFO") +log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days") + + +###Pydatnic models +@dataclass +class Item: + superlocation: str | None = dataclass_field(default_factory=str) + status: str | None = dataclass_field(default_factory=str) + availability: str | None = dataclass_field(default_factory=str) + notes: str | None = dataclass_field(default_factory=str) + limitation: str | None = dataclass_field(default_factory=str) + duedate: str | None = dataclass_field(default_factory=str) + id: str | None = dataclass_field(default_factory=str) + item_id: str | None = dataclass_field(default_factory=str) + ilslink: str | None = dataclass_field(default_factory=str) + number: int | None = dataclass_field(default_factory=int) + barcode: str | None = dataclass_field(default_factory=str) + reserve: str | None = dataclass_field(default_factory=str) + callnumber: str | None = dataclass_field(default_factory=str) + department: str | None = dataclass_field(default_factory=str) + locationhref: str | None = dataclass_field(default_factory=str) + location: str | None = dataclass_field(default_factory=str) + ktrl_nr: str | None = dataclass_field(default_factory=str) + + def from_dict(self, data: dict): + """Import data from dict""" + data = data["items"] + for entry in data: + for key, value in entry.items(): + setattr(self, key, value) + return self + + +@dataclass +class RDS_AVAIL_DATA: + """Class to store RDS availability data""" + + library_sigil: str = dataclass_field(default_factory=str) + items: List[Item] = dataclass_field(default_factory=list) + + def import_from_dict(self, data: str): + """Import data from dict""" + edata = json.loads(data) + # library sigil is first key + + self.library_sigil = str(list(edata.keys())[0]) + # get data from first key + edata = edata[self.library_sigil] + for location in edata: + item = Item(superlocation=location).from_dict(edata[location]) + + self.items.append(item) + return self + + +@dataclass +class RDS_DATA: + """Class to store RDS data""" + + RDS_SIGNATURE: str = dataclass_field(default_factory=str) + RDS_STATUS: str = dataclass_field(default_factory=str) + RDS_LOCATION: str = dataclass_field(default_factory=str) + RDS_URL: Any = dataclass_field(default_factory=str) + RDS_HINT: Any = dataclass_field(default_factory=str) + RDS_COMMENT: Any = dataclass_field(default_factory=str) + RDS_HOLDING: Any = dataclass_field(default_factory=str) + RDS_HOLDING_LEAK: Any = dataclass_field(default_factory=str) + RDS_INTERN: Any = dataclass_field(default_factory=str) + RDS_PROVENIENCE: Any = dataclass_field(default_factory=str) + RDS_LOCAL_NOTATION: str = dataclass_field(default_factory=str) + RDS_LEA: Any = dataclass_field(default_factory=str) + + def import_from_dict(self, data: dict) -> RDS_DATA: + """Import data from dict""" + for key, value in data.items(): + setattr(self, key, value) + return self + + +@dataclass +class RDS_GENERIC_DATA: + LibrarySigil: str = dataclass_field(default_factory=str) + RDS_DATA: List[RDS_DATA] = dataclass_field(default_factory=list) + + def import_from_dict(self, data: str) -> RDS_GENERIC_DATA: + """Import data from dict""" + edata = json.loads(data) + # library sigil is first key + self.LibrarySigil = str(list(edata.keys())[0]) + # get data from first key + edata = edata[self.LibrarySigil] + for entry in edata: + rds_data = RDS_DATA() # Create a new RDS_DATA instance + # Populate the RDS_DATA instance from the entry + # This assumes that the entry is a dictionary that matches the structure of the RDS_DATA class + rds_data.import_from_dict(entry) + self.RDS_DATA.append(rds_data) # Add the RDS_DATA instance to the list + return self + + +class BaseStruct: + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + + +class ARRAYData: + def __init__(self, signature=None) -> None: + self.signature = None + pass + + def transform(self, data: str) -> BookData: + def _get_line(source: str, search: str) -> str: + try: + data = ( + source.split(search)[1] + .split("\n")[0] + .strip() + .replace("=>", "") + .strip() + ) + return data + + except Exception: + # # log.debug(f"ARRAYData.transform failed, {source}, {search}") + log.exception(f"ARRAYData.transform failed, no string {search}") + return "" + + def _get_list_entry(source: str, search: str, entry: str) -> str: + try: + source = source.replace("\t", "").replace("\r", "") + source = source.split(search)[1].split(")")[0] + return _get_line(source, entry).replace("=>", "").strip() + except: + return "" + + def _get_isbn(source: str) -> list: + try: + isbn = source.split("[isbn]")[1].split(")")[0].strip() + isbn = isbn.split("(")[1] + isbns = isbn.split("=>") + ret = [] + for _ in isbns: + # remove _ from list + isb = _.split("\n")[0].strip() + if isb == "": + continue + ret.append(isb) if isb not in ret else None + return ret + except: + isbn = [] + return isbn + + def _get_signature(data): + try: + sig_data = ( + data.split("[loksatz]")[1] + .split("[0] => ")[1] + .split("\n")[0] + .strip() + ) + signature_data = eval(sig_data) + return signature_data["signatur"] + except Exception: + return None + + def _get_author(data): + try: + array = data.split("[au_display_short]")[1].split(")\n")[0].strip() + except Exception: + return "" + entries = array.split("\n") + authors = [] + hg_present = False + verf_present = False + lines = [] + for entry in entries: + if "=>" in entry: + line = entry.split("=>")[1].strip() + if "[HerausgeberIn]" in line: + hg_present = True + if "[VerfasserIn]" in line: + verf_present = True + lines.append(line) + for line in lines: + if hg_present and verf_present: + if "[HerausgeberIn]" in line: + authors.append(line.split("[")[0].strip()) + elif verf_present: + if "[VerfasserIn]" in line: + authors.append(line.split("[")[0].strip()) + else: + pass + return ";".join(authors) + + def _get_title(data): + titledata = None + title = "" + if "[ti_long]" in data: + titledata = data.split("[ti_long]")[1].split(")\n")[0].strip() + title = titledata.split("=>")[1].strip().split("/")[0].strip() + if "[ti_long_f]" in data: + titledata = data.split("[ti_long_f]")[1].split(")\n")[0].strip() + title = titledata.split("=>")[1].strip().split("/")[0].strip() + return title + + def _get_adis_idn(data, signature): + loksatz_match = re.search( + r"\[loksatz\] => Array\s*\((.*?)\)", data, re.DOTALL + ) + if loksatz_match: + loksatz_content = loksatz_match.group(1) + + # Step 2: Extract JSON objects within the loksatz section + json_objects = re.findall(r"{.*?}", loksatz_content, re.DOTALL) + # Print each JSON object + for obj in json_objects: + data = eval(obj) + if data["signatur"] == signature: + return data["adis_idn"] + + def _get_in_apparat(data): + loksatz_match = re.search( + r"\[loksatz\] => Array\s*\((.*?)\)", data, re.DOTALL + ) + if loksatz_match: + loksatz_content = loksatz_match.group(1) + + # Step 2: Extract JSON objects within the loksatz section + json_objects = re.findall(r"{.*?}", loksatz_content, re.DOTALL) + # Print each JSON object + for obj in json_objects: + data = eval(obj) + if data["ausleihcode"] == "R" and data["standort"] == "40": + return True + else: + return False + + ppn = _get_line(data, "[kid]") + title = _get_title(data).strip() + author = _get_author(data) + edition = _get_list_entry(data, "[ausgabe]", "[0]").replace(",", "") + link = f"https://rds.ibs-bw.de/phfreiburg/link?kid={_get_line(data, '[kid]')}" + isbn = _get_isbn(data) + # [self._get_list_entry(data,"[isbn]","[0]"),self._get_list_entry(data,"[is]","[1]")], + language = _get_list_entry(data, "[la_facet]", "[0]") + publisher = _get_list_entry(data, "[pu]", "[0]") + year = _get_list_entry(data, "[py_display]", "[0]") + pages = _get_list_entry(data, "[umfang]", "[0]").split(":")[0].strip() + signature = ( + self.signature if self.signature is not None else _get_signature(data) + ) + + place = _get_list_entry(data, "[pp]", "[0]") + adis_idn = _get_adis_idn(data, signature=signature) + in_apparat = _get_in_apparat(data) + return BookData( + ppn=ppn, + title=title, + author=author, + edition=edition, + link=link, + isbn=isbn, + language=language, + publisher=publisher, + year=year, + pages=pages, + signature=signature, + place=place, + adis_idn=adis_idn, + in_apparat=in_apparat, + ) + + +class COinSData: + def __init__(self) -> None: + pass + + def transform(self, data: str) -> BookData: + def _get_line(source: str, search: str) -> str: + try: + data = source.split(f"{search}=")[1] # .split("")[0].strip() + return data.split("rft")[0].strip() if "rft" in data else data + except: + return "" + + return BookData( + ppn=_get_line(data, "rft_id").split("=")[1], + title=_get_line(data, "rft.btitle"), + author=f"{_get_line(data, 'rft.aulast')}, {_get_line(data, 'rft.aufirst')}", + edition=_get_line(data, "rft.edition"), + link=_get_line(data, "rft_id"), + isbn=_get_line(data, "rft.isbn"), + publisher=_get_line(data, "rft.pub"), + year=_get_line(data, "rft.date"), + pages=_get_line(data, "rft.tpages").split(":")[0].strip(), + ) + + +class RISData: + def __init__(self) -> None: + pass + + def transform(self, data: str) -> BookData: + def _get_line(source: str, search: str) -> str: + try: + data = source.split(f"{search} - ")[1] # .split("")[0].strip() + return data.split("\n")[0].strip() if "\n" in data else data + except: + return "" + + return BookData( + ppn=_get_line(data, "DP").split("=")[1], + title=_get_line(data, "TI"), + signature=_get_line(data, "CN"), + edition=_get_line(data, "ET").replace(",", ""), + link=_get_line(data, "DP"), + isbn=_get_line(data, "SN").split(","), + author=_get_line(data, "AU").split("[")[0].strip(), + language=_get_line(data, "LA"), + publisher=_get_line(data, "PB"), + year=_get_line(data, "PY"), + pages=_get_line(data, "SP"), + ) + + +class BibTeXData: + def __init__(self): + pass + + def transform(self, data: str) -> BookData: + def _get_line(source: str, search: str) -> str: + try: + return ( + data.split(search)[1] + .split("\n")[0] + .strip() + .split("=")[1] + .strip() + .replace("{", "") + .replace("}", "") + .replace(",", "") + .replace("[", "") + .replace("];", "") + ) + except: + return "" + + return BookData( + ppn=None, + title=_get_line(data, "title"), + signature=_get_line(data, "bestand"), + edition=_get_line(data, "edition"), + isbn=_get_line(data, "isbn"), + author=";".join(_get_line(data, "author").split(" and ")), + language=_get_line(data, "language"), + publisher=_get_line(data, "publisher"), + year=_get_line(data, "year"), + pages=_get_line(data, "pages"), + ) + + +class RDSData: + def __init__(self): + self.retlist = [] + + def transform(self, data: str): + # rds_availability = RDS_AVAIL_DATA() + # rds_data = RDS_GENERIC_DATA() + print(data) + + def __get_raw_data(data: str) -> list: + # create base data to be turned into pydantic classes + data = data.split("RDS ----------------------------------")[1] + edata = data.strip() + edata = edata.split("\n", 9)[9] + edata = edata.split("\n")[1:] + entry_1 = edata[0] + edata = edata[1:] + entry_2 = "".join(edata) + edata = [] + edata.append(entry_1) + edata.append(entry_2) + return edata + + ret_data = __get_raw_data(data) + # assign data[1] to RDS_AVAIL_DATA + # assign data[0] to RDS_DATA + self.rds_data = RDS_GENERIC_DATA().import_from_dict(ret_data[1]) + self.rds_availability = RDS_AVAIL_DATA().import_from_dict(ret_data[0]) + self.retlist.append(self.rds_availability) + self.retlist.append(self.rds_data) + return self + + def return_data(self, option=None): + if option == "rds_availability": + return self.retlist[0] + elif option == "rds_data": + return self.retlist[1] + else: + return {"rds_availability": self.retlist[0], "rds_data": self.retlist[1]} + + +class DictToTable: + def __init__(self): + self.work_author = None + self.section_author = None + self.year = None + self.edition = None + self.work_title = None + self.chapter_title = None + self.location = None + self.publisher = None + self.signature = None + self.type = None + self.pages = None + self.issue = None + self.isbn = None + + def makeResult(self): + data = { + "work_author": self.work_author, + "section_author": self.section_author, + "year": self.year, + "edition": self.edition, + "work_title": self.work_title, + "chapter_title": self.chapter_title, + "location": self.location, + "publisher": self.publisher, + "signature": self.signature, + "issue": self.issue, + "pages": self.pages, + "isbn": self.isbn, + "type": self.type, + } + data = {k: v for k, v in data.items() if v is not None} + return data + + def reset(self): + for key in self.__dict__: + setattr(self, key, None) + + def transform(self, data: dict): + mode = data["mode"] + self.reset() + if mode == "book": + return self.book_assign(data) + elif mode == "hg": + return self.hg_assign(data) + elif mode == "zs": + return self.zs_assign(data) + else: + return None + + def book_assign(self, data): + self.type = "book" + self.work_author = data["book_author"] + self.signature = data["book_signature"] + self.location = data["book_place"] + self.year = data["book_year"] + self.work_title = data["book_title"] + self.edition = data["book_edition"] + self.pages = data["book_pages"] + self.publisher = data["book_publisher"] + self.isbn = data["book_isbn"] + return self.makeResult() + + def hg_assign(self, data): + self.type = "hg" + self.section_author = data["hg_author"] + self.work_author = data["hg_editor"] + self.year = data["hg_year"] + self.work_title = data["hg_title"] + self.publisher = data["hg_publisher"] + self.location = data["hg_place"] + self.edition = data["hg_edition"] + self.chapter_title = data["hg_chaptertitle"] + self.pages = data["hg_pages"] + self.signature = data["hg_signature"] + self.isbn = data["hg_isbn"] + return self.makeResult() + + def zs_assign(self, data): + self.type = "zs" + self.section_author = data["zs_author"] + self.chapter_title = data["zs_chapter_title"] + self.location = data["zs_place"] + self.issue = data["zs_issue"] + self.pages = data["zs_pages"] + self.publisher = data["zs_publisher"] + self.isbn = data["zs_isbn"] + + self.year = data["zs_year"] + self.signature = data["zs_signature"] + self.work_title = data["zs_title"] + return self.makeResult() + + +if __name__ == "__main__": + with open("daiadata", "r") as f: + data = f.read() + + ret = RDSData().transform(data) + data = ret.return_data("rds_availability") + # log.debug(data) diff --git a/src/parsers/word_parser.py b/src/parsers/word_parser.py new file mode 100644 index 0000000..44fb2d8 --- /dev/null +++ b/src/parsers/word_parser.py @@ -0,0 +1,373 @@ +import zipfile +from typing import Any, Optional + +import fitz # PyMuPDF +import pandas as pd +from bs4 import BeautifulSoup +from docx import Document + +from src.core.models import Book, SemapDocument +from src.shared.logging import log + + +def word_docx_to_csv(path: str) -> list[pd.DataFrame]: + doc = Document(path) + tables = doc.tables + m_data = [] + for table in tables: + data = [] + for row in table.rows: + row_data: list[Any] = [] + for cell in row.cells: + text = cell.text + + text = text.replace("\n", "") + row_data.append(text) + # if text == "Ihr Fach:": + # row_data.append(get_fach(path)) + data.append(row_data) + df = pd.DataFrame(data) + df.columns = df.iloc[0] + df = df.iloc[1:] + + m_data.append(df) + + return m_data + + +def get_fach(path: str) -> Optional[str]: + document = zipfile.ZipFile(path) + xml_data = document.read("word/document.xml") + document.close() + + soup = BeautifulSoup(xml_data, "xml") + # text we need is in -> w:r -> w:t + paragraphs = soup.find_all("w:p") + for para in paragraphs: + para_id = para.get("w14:paraId") + if para_id == "12456A32": + # get the data in the w:t + for run in para.find_all("w:r"): + data = run.find("w:t") + if data and data.contents: + return data.contents[0] + return None + + +def makeDict() -> dict[str, Optional[str]]: + return { + "work_author": None, + "section_author": None, + "year": None, + "edition": None, + "work_title": None, + "chapter_title": None, + "location": None, + "publisher": None, + "signature": None, + "issue": None, + "pages": None, + "isbn": None, + "type": None, + } + + +def tuple_to_dict(tlist: tuple, type: str) -> list[dict[str, Optional[str]]]: + ret: list[dict[str, Optional[str]]] = [] + for line in tlist: + data = makeDict() + if type == "Monografien": + data["type"] = type + data["work_author"] = line[0] + data["year"] = line[1] + data["edition"] = line[2] + data["work_title"] = line[3] + data["location"] = line[4] + data["publisher"] = line[5] + data["signature"] = line[6] + data["pages"] = line[7] + elif type == "Herausgeberwerke": + data["type"] = type + data["section_author"] = line[0] + data["year"] = line[1] + data["edition"] = line[2] + data["chapter_title"] = line[3] + data["work_author"] = line[4] + data["work_title"] = line[5] + data["location"] = line[6] + data["publisher"] = line[7] + data["signature"] = line[9] + data["pages"] = line[8] + elif type == "Zeitschriftenaufsätze": + data["type"] = type + data["section_author"] = line[0] + data["year"] = line[1] + data["issue"] = line[2] + data["chapter_title"] = line[3] + data["work_title"] = line[4] + data["location"] = line[5] + data["publisher"] = line[6] + data["signature"] = line[8] + data["pages"] = line[7] + ret.append(data) + return ret + + +def elsa_word_to_csv(path: str) -> tuple[list[dict[str, Optional[str]]], str]: + doc = Document(path) + # # print all lines in doc + doctype = [para.text for para in doc.paragraphs if para.text != ""][-1] + tuples = { + "Monografien": ("", "", "", "", "", "", "", "", ""), + "Herausgeberwerke": ("", "", "", "", "", "", "", "", "", "", ""), + "Zeitschriftenaufsätze": ("", "", "", "", "", "", "", "", "", ""), + } + tables = doc.tables + + m_data: list[pd.DataFrame] = [] + for table in tables: + data: list[list[str]] = [] + for row in table.rows: + row_data: list[str] = [] + for cell in row.cells: + text = cell.text + text = text.replace("\n", "") + text = text.replace("\u2002", "") + row_data.append(text) + data.append(row_data) + df = pd.DataFrame(data) + df.columns = df.iloc[0] + df = df.iloc[1:] + m_data.append(df) + df = m_data[0] + # split df to rows + data = [ + row for row in df.itertuples(index=False, name=None) if row != tuples[doctype] + ] + # log.debug(data) + return tuple_to_dict(data, doctype), doctype + + +def word_to_semap(word_path: str, ai: bool = True) -> SemapDocument: + log.info("Parsing Word Document {}", word_path) + semap = SemapDocument() + df = word_docx_to_csv(word_path) + apparatdata = df[0] + apparatdata = apparatdata.to_dict() + keys = list(apparatdata.keys()) + # print(apparatdata, keys) + + appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys) - 1, 2)} + semap.phoneNumber = appdata["Telefon:"] + semap.subject = appdata["Ihr Fach:"] + semap.mail = appdata["Mailadresse:"] + semap.personName = ",".join(appdata["Ihr Name und Titel:"].split(",")[:-1]) + semap.personTitle = ",".join(appdata["Ihr Name und Titel:"].split(",")[-1:]).strip() + apparatdata = df[1] + apparatdata = apparatdata.to_dict() + keys = list(apparatdata.keys()) + appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)} + semap.title = appdata["Veranstaltung:"] + semap.semester = appdata["Semester:"] + if ai: + semap.renameSemester + semap.nameSetter + + books = df[2] + booklist = [] + for i in range(len(books)): + if books.iloc[i].isnull().all(): + continue + data = books.iloc[i].to_dict() + book = Book() + book.from_dict(data) + if book.is_empty: + continue + elif not book.has_signature: + continue + else: + booklist.append(book) + log.info("Found {} books", len(booklist)) + semap.books = booklist + return semap + + +def pdf_to_semap(pdf_path: str, ai: bool = True) -> SemapDocument: + """ + Parse a Semesterapparat PDF like the sample you provided and return a SemapDocument. + - No external programs, only PyMuPDF. + - Robust to multi-line field values (e.g., hyphenated emails) and multi-line table cells. + - Works across multiple pages; headers only need to exist on the first page. + """ + doc = fitz.open(pdf_path) + semap = SemapDocument() + + # ---------- helpers ---------- + def _join_tokens(tokens: list[str]) -> str: + """Join tokens, preserving hyphen/URL joins across line wraps.""" + parts = [] + for tok in tokens: + if parts and ( + parts[-1].endswith("-") + or parts[-1].endswith("/") + or parts[-1].endswith(":") + ): + parts[-1] = parts[-1] + tok # no space after '-', '/' or ':' + else: + parts.append(tok) + return " ".join(parts).strip() + + def _extract_row_values_multiline( + page, labels: list[str], y_window: float = 24 + ) -> dict[str, str]: + """For a row of inline labels (e.g., Name/Fach/Telefon/Mail), grab text to the right of each label.""" + rects = [] + for lab in labels: + hits = page.search_for(lab) + if hits: + rects.append((lab, hits[0])) + if not rects: + return {} + + rects.sort(key=lambda t: t[1].x0) + words = page.get_text("words") + out = {} + for i, (lab, r) in enumerate(rects): + x0 = r.x1 + 1 + x1 = rects[i + 1][1].x0 - 1 if i + 1 < len(rects) else page.rect.width - 5 + y0 = r.y0 - 3 + y1 = r.y0 + y_window + toks = [w for w in words if x0 <= w[0] <= x1 and y0 <= w[1] <= y1] + toks.sort(key=lambda w: (w[1], w[0])) # line, then x + out[lab] = _join_tokens([w[4] for w in toks]) + return out + + def _compute_columns_from_headers(page0): + """Find column headers (once) and derive column centers + header baseline.""" + headers = [ + ("Autorenname(n):", "Autorenname(n):Nachname, Vorname"), + ("Jahr/Auflage", "Jahr/Auflage"), + ("Titel", "Titel"), + ("Ort und Verlag", "Ort und Verlag"), + ("Standnummer", "Standnummer"), + ("Interne Vermerke", "Interne Vermerke"), + ] + found = [] + for label, canon in headers: + rects = [ + r for r in page0.search_for(label) if r.y0 > 200 + ] # skip top-of-form duplicates + if rects: + found.append((canon, rects[0])) + found.sort(key=lambda t: t[1].x0) + cols = [(canon, r.x0, r.x1, (r.x0 + r.x1) / 2.0) for canon, r in found] + header_y = min(r.y0 for _, r in found) if found else 0 + return cols, header_y + + def _extract_table_rows_from_page( + page, cols, header_y, y_top_margin=5, y_bottom_margin=40, y_tol=26.0 + ): + """ + Group words into logical rows (tolerant to wrapped lines), then map each word + to the nearest column by x-center and join tokens per column. + """ + words = [ + w + for w in page.get_text("words") + if w[1] > header_y + y_top_margin + and w[3] < page.rect.height - y_bottom_margin + ] + + # group into row bands by y (tolerance big enough to capture wrapped lines, but below next row gap) + rows = [] + for w in sorted(words, key=lambda w: w[1]): + y = w[1] + for row in rows: + if abs(row["y_mean"] - y) <= y_tol: + row["ys"].append(y) + row["y_mean"] = sum(row["ys"]) / len(row["ys"]) + row["words"].append(w) + break + else: + rows.append({"y_mean": y, "ys": [y], "words": [w]}) + + # map to columns + join + joined_rows = [] + for row in rows: + rowdict = {canon: "" for canon, *_ in cols} + words_by_col = {canon: [] for canon, *_ in cols} + for w in sorted(row["words"], key=lambda w: (w[1], w[0])): + xmid = (w[0] + w[2]) / 2.0 + canon = min(cols, key=lambda c: abs(xmid - c[3]))[0] + words_by_col[canon].append(w[4]) + for canon, toks in words_by_col.items(): + rowdict[canon] = _join_tokens(toks) + if any(v for v in rowdict.values()): + joined_rows.append(rowdict) + return joined_rows + + # ---------- top-of-form fields ---------- + p0 = doc[0] + row1 = _extract_row_values_multiline( + p0, + ["Ihr Name und Titel:", "Ihr Fach:", "Telefon:", "Mailadresse:"], + y_window=22, + ) + row2 = _extract_row_values_multiline( + p0, ["Veranstaltung:", "Semester:"], y_window=20 + ) + + name_title = row1.get("Ihr Name und Titel:", "") or "" + semap.subject = row1.get("Ihr Fach:", None) + semap.phoneNumber = row1.get("Telefon:", None) # keep as-is (string like "682-308") + semap.mail = row1.get("Mailadresse:", None) + semap.personName = ",".join(name_title.split(",")[:-1]) if name_title else None + semap.personTitle = ( + ",".join(name_title.split(",")[-1:]).strip() if name_title else None + ) + + semap.title = row2.get("Veranstaltung:", None) + semap.semester = row2.get("Semester:", None) + + # ---------- table extraction (all pages) ---------- + cols, header_y = _compute_columns_from_headers(p0) + all_rows: list[dict[str, Any]] = [] + for pn in range(len(doc)): + all_rows.extend(_extract_table_rows_from_page(doc[pn], cols, header_y)) + + # drop the sub-header line "Nachname, Vorname" etc. + filtered = [] + for r in all_rows: + if r.get("Autorenname(n):Nachname, Vorname", "").strip() in ( + "", + "Nachname, Vorname", + ): + # skip if it's just the sub-header line + if all(not r[c] for c in r if c != "Autorenname(n):Nachname, Vorname"): + continue + filtered.append(r) + + # build Book objects (same filters as your word parser) + booklist: list[Book] = [] + for row in filtered: + b = Book() + b.from_dict(row) + if b.is_empty: + continue + if not b.has_signature: + continue + booklist.append(b) + + semap.books = booklist + + # keep parity with your post-processing + if ai: + _ = semap.renameSemester + _ = semap.nameSetter + + return semap + + +if __name__ == "__main__": + else_df = pdf_to_semap("C:/Users/aky547/Dokumente/testsemap.pdf") + # print(else_df) diff --git a/src/parsers/xml_parser.py b/src/parsers/xml_parser.py new file mode 100644 index 0000000..90ffdb0 --- /dev/null +++ b/src/parsers/xml_parser.py @@ -0,0 +1,67 @@ +import xml.etree.ElementTree as ET + +from src.core.models import Apparat, BookData, SemapDocument, XMLMailSubmission +from src.core.semester import Semester + + +def parse_xml_submission(xml_string: str) -> XMLMailSubmission: + """ + Parse an XML string representing a mail submission and return an XMLMailSubmission object. + """ + submission = XMLMailSubmission() + root = ET.fromstring(xml_string) + static_data = root.find("static") + static_info = {child.tag: child.text for child in static_data} + books = root.find("books") + books_info = [] + for book in books: + book_details = {detail.tag: detail.text for detail in book} + book = BookData( + author=book_details.get("authorname"), + year=book_details.get("year").split("/")[0] + if "/" in book_details.get("year") + else book_details.get("year"), + edition=book_details.get("year").split("/")[1] + if "/" in book_details.get("year") + else None, + title=book_details.get("title"), + signature=book_details.get("signature"), + ) + books_info.append(book) + # Extract static data + submission.name = static_info.get("name") + submission.lastname = static_info.get("lastname") + submission.title = static_info.get("title") + submission.telno = int(static_info.get("telno")) + submission.email = static_info.get("mail") + submission.app_name = static_info.get("apparatsname") + submission.subject = static_info.get("subject") + sem_year = static_info.get("semester").split()[1] + sem_term = static_info.get("semester").split()[0] + submission.semester = Semester(semester=sem_term, year=int(sem_year)) + submission.books = books_info + # Extract book information + # book_info = [] + # for book in books: + # book_details = {detail.tag: detail.text for detail in book} + # book_info.append(book_details) + return submission + + +def eml_parser(path: str) -> XMLMailSubmission: + with open(path, "r", encoding="utf-8") as file: + xml_content = file.read().split("\n\n", 1)[1] # Skip headers + print("EML content loaded, parsing XML...") + print(xml_content) + return parse_xml_submission(xml_content) + + +def eml_to_semap(xml_mail: XMLMailSubmission) -> SemapDocument: + submission = eml_parser(xml_mail) + semap_doc = SemapDocument( + # prof=Prof(name=submission.name, lastname=submission.lastname, email=submission.email), + apparat=Apparat(name=submission.app_name, subject=submission.subject), + semester=submission.semester, + books=submission.books, + ) + return semap_doc diff --git a/src/services/__init__.py b/src/services/__init__.py new file mode 100644 index 0000000..dec0a5f --- /dev/null +++ b/src/services/__init__.py @@ -0,0 +1,16 @@ +"""External service integrations and API clients.""" + +from .catalogue import Catalogue +from .sru import SWB +from .lehmanns import LehmannsClient +from .zotero import ZoteroController +from .webrequest import BibTextTransformer, WebRequest + +__all__ = [ + "Catalogue", + "SWB", + "LehmannsClient", + "ZoteroController", + "BibTextTransformer", + "WebRequest", +] diff --git a/src/services/catalogue.py b/src/services/catalogue.py new file mode 100644 index 0000000..ff3b306 --- /dev/null +++ b/src/services/catalogue.py @@ -0,0 +1,292 @@ +from typing import List + +import regex +import requests +from bs4 import BeautifulSoup + +from src.core.models import BookData as Book +from src.shared.logging import log + +URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND" +BASE = "https://rds.ibs-bw.de" + + +class Catalogue: + def __init__(self, timeout=15): + self.timeout = timeout + reachable = self.check_connection() + if not reachable: + log.error("No internet connection available.") + raise ConnectionError("No internet connection available.") + + def check_connection(self): + try: + response = requests.get("https://www.google.com", timeout=self.timeout) + if response.status_code == 200: + return True + except requests.exceptions.RequestException as e: + log.error(f"Could not connect to google.com: {e}") + + def search_book(self, searchterm: str): + response = requests.get(URL.format(searchterm), timeout=self.timeout) + return response.text + + def search(self, link: str): + response = requests.get(link, timeout=self.timeout) + return response.text + + def get_book_links(self, searchterm: str) -> List[str]: + response = self.search_book(searchterm) + soup = BeautifulSoup(response, "html.parser") + links = soup.find_all("a", class_="title getFull") + res: List[str] = [] + for link in links: + res.append(BASE + link["href"]) # type: ignore + return res + + def get_book(self, searchterm: str): + log.info(f"Searching for term: {searchterm}") + + links = self.get_book_links(searchterm) + print(links) + for elink in links: + result = self.search(elink) + # in result search for class col-xs-12 rds-dl RDS_LOCATION + # if found, return text of href + soup = BeautifulSoup(result, "html.parser") + + # Optional (unchanged): title and ppn if you need them + title_el = soup.find("div", class_="headline text") + title = title_el.get_text(strip=True) if title_el else None + + ppn_el = soup.find( + "div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN" + ) + # in ppn_el, get text of div col-xs-12 col-md-7 col-lg-8 rds-dl-panel + ppn = ( + ppn_el.find_next_sibling( + "div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel" + ).get_text(strip=True) + if ppn_el + else None + ) + + # get edition text at div class col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION + edition_el = soup.find( + "div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION" + ) + edition = ( + edition_el.find_next_sibling( + "div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel" + ).get_text(strip=True) + if edition_el + else None + ) + + authors = soup.find_all( + "div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON" + ) + author = None + if authors: + # get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel + author_names = [] + for author in authors: + panel = author.find_next_sibling( + "div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel" + ) + if panel: + links = panel.find_all("a") + for link in links: + author_names.append(link.text.strip()) + author = ( + ";".join(author_names) if len(author_names) > 1 else author_names[0] + ) + signature = None + + panel = soup.select_one("div.panel-body") + if panel: + # Collect the RDS_* blocks in order, using the 'space' divs as separators + groups = [] + cur = {} + for node in panel.select( + "div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space" + ): + classes = node.get("class", []) + # Separator between entries + if "space" in classes: + if cur: + groups.append(cur) + cur = {} + continue + + # Read the value from the corresponding panel cell + val_el = node.select_one(".rds-dl-panel") + val = ( + val_el.get_text(" ", strip=True) + if val_el + else node.get_text(" ", strip=True) + ) + + if "RDS_SIGNATURE" in classes: + cur["signature"] = val + elif "RDS_STATUS" in classes: + cur["status"] = val + elif "RDS_LOCATION" in classes: + cur["location"] = val + + if cur: # append the last group if not followed by a space + groups.append(cur) + + # Find the signature for the entry whose location mentions "Semesterapparat" + for g in groups: + loc = g.get("location", "").lower() + if "semesterapparat" in loc: + signature = g.get("signature") + return Book( + title=title, + ppn=ppn, + signature=signature, + library_location=loc.split("-")[-1], + link=elink, + author=author, + edition=edition, + ) + else: + return Book( + title=title, + ppn=ppn, + signature=signature, + library_location=loc.split("\n\n")[-1], + link=elink, + author=author, + edition=edition, + ) + + def get(self, ppn: str) -> Book | None: + # based on PPN, get title, people, edition, year, language, pages, isbn, + link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}" + result = self.search(link) + soup = BeautifulSoup(result, "html.parser") + + def get_ppn(self, searchterm: str) -> str | None: + links = self.get_book_links(searchterm) + ppn = None + for link in links: + result = self.search(link) + soup = BeautifulSoup(result, "html.parser") + print(link) + ppn = link.split("/")[-1] + if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn): + return ppn + return ppn + + def get_semesterapparat_number(self, searchterm: str) -> int: + links = self.get_book_links(searchterm) + for link in links: + result = self.search(link) + # in result search for class col-xs-12 rds-dl RDS_LOCATION + # if found, return text of href + soup = BeautifulSoup(result, "html.parser") + + locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION") + for location_el in locations: + if "Semesterapparat-" in location_el.text: + match = regex.search(r"Semesterapparat-(\d+)", location_el.text) + if match: + return int(match.group(1)) + if "Handbibliothek-" in location_el.text: + return location_el.text.strip().split("\n\n")[-1].strip() + return location_el.text.strip().split("\n\n")[-1].strip() + return 0 + + def get_author(self, link: str) -> str: + links = self.get_book_links(f"kid:{link}") + author = None + for link in links: + # print(link) + result = self.search(link) + soup = BeautifulSoup(result, "html.parser") + # get all authors, return them as a string seperated by ; + authors = soup.find_all( + "div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON" + ) + if authors: + # get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel + author_names = [] + for author in authors: + panel = author.find_next_sibling( + "div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel" + ) + if panel: + links = panel.find_all("a") + for link in links: + author_names.append(link.text.strip()) + author = "; ".join(author_names) + return author + + def get_signature(self, isbn: str): + links = self.get_book_links(f"{isbn}") + signature = None + for link in links: + result = self.search(link) + soup = BeautifulSoup(result, "html.parser") + panel = soup.select_one("div.panel-body") + if panel: + # Collect the RDS_* blocks in order, using the 'space' divs as separators + groups = [] + cur = {} + for node in panel.select( + "div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space" + ): + classes = node.get("class", []) + # Separator between entries + if "space" in classes: + if cur: + groups.append(cur) + cur = {} + continue + + # Read the value from the corresponding panel cell + val_el = node.select_one(".rds-dl-panel") + val = ( + val_el.get_text(" ", strip=True) + if val_el + else node.get_text(" ", strip=True) + ) + + if "RDS_SIGNATURE" in classes: + cur["signature"] = val + elif "RDS_STATUS" in classes: + cur["status"] = val + elif "RDS_LOCATION" in classes: + cur["location"] = val + + if cur: # append the last group if not followed by a space + groups.append(cur) + + # Find the signature for the entry whose location mentions "Semesterapparat" + for g in groups: + print(g) + loc = g.get("location", "").lower() + if "semesterapparat" in loc: + signature = g.get("signature") + return signature + else: + signature = g.get("signature") + return signature + print("No signature found") + return signature + + def in_library(self, ppn: str) -> bool: + if ppn is None: + return False + links = self.get_book_links(f"kid:{ppn}") + return len(links) > 0 + + def get_location(self, ppn: str) -> str | None: + if ppn is None: + return None + link = self.get_book(f"{ppn}") + if link is None: + return None + return link.library_location diff --git a/src/services/lehmanns.py b/src/services/lehmanns.py new file mode 100644 index 0000000..9640275 --- /dev/null +++ b/src/services/lehmanns.py @@ -0,0 +1,312 @@ +from __future__ import annotations + +import re +from dataclasses import asdict, dataclass, field +from typing import Iterable, List, Optional +from urllib.parse import quote_plus, urljoin + +import httpx +from bs4 import BeautifulSoup + +from src.core.models import BookData + +BASE = "https://www.lehmanns.de" +SEARCH_URL = "https://www.lehmanns.de/search/quick?mediatype_id=&q=" + + +@dataclass +class LehmannsSearchResult: + title: str + url: str + + # Core fields from the listing card + year: Optional[int] = None + edition: Optional[int] = None + publisher: Optional[str] = None + isbn13: Optional[str] = None + + # Extras from the listing card + description: Optional[str] = None + authors: list[str] = field(default_factory=list) + media_type: Optional[str] = None + book_format: Optional[str] = None + price_eur: Optional[float] = None + currency: str = "EUR" + image: Optional[str] = None + + # From detail page: + pages: Optional[str] = None # " Seiten" + buyable: bool = True # set in enrich_pages (detail page) + unavailable_hint: Optional[str] = ( + None # e.g. "Titel ist leider vergriffen; keine Neuauflage" + ) + + def to_dict(self) -> dict: + return asdict(self) + + +class LehmannsClient: + """Scrapes quick-search results, then enriches (and filters) via product pages.""" + + def __init__(self, timeout: float = 20.0): + self.client = httpx.Client( + headers={ + "User-Agent": ( + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/124.0 Safari/537.36" + ), + "Accept-Language": "de-DE,de;q=0.9,en;q=0.8", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + }, + timeout=timeout, + follow_redirects=True, + ) + + def close(self): + self.client.close() + + def __enter__(self): + return self + + def __exit__(self, *exc): + self.close() + + # ------------------- Search (listing) ------------------- + + def build_search_url(self, title: str) -> str: + # spaces -> '+' + return SEARCH_URL + quote_plus(title) + + def search_by_title( + self, + title: str, + limit: Optional[int] = None, + strict: bool = False, + only_latest: bool = True, + ) -> List[BookData]: + """ + Parse the listing page only (no availability check here). + Use enrich_pages(...) afterwards to fetch detail pages, add 'pages', + and drop unbuyable items. + """ + url = self.build_search_url(title=title) + html = self._get(url) + if not html: + return [] + results = self._parse_results(html) + self.enrich_pages(results) + + results = [BookData().from_LehmannsSearchResult(r) for r in results] + if strict: + # filter results to only those with exact title match (case-insensitive) + title_lower = title.lower() + results = [r for r in results if r.title and r.title.lower() == title_lower] + # results = [r for r in results if r.buyable] + return results + if limit is not None: + results = results[: max(0, limit)] + if only_latest and len(results) > 1: + # keep only the latest edition (highest edition number) + results.sort(key=lambda r: (r.edition_number or 0), reverse=True) + results = [results[0]] + return results + + # ------------------- Detail enrichment & filtering ------------------- + + def enrich_pages( + self, results: Iterable[LehmannsSearchResult], drop_unbuyable: bool = True + ) -> List[LehmannsSearchResult]: + """ + Fetch each result.url, extract: + - pages: from ... + - availability: from
  • ...
  • + * if it contains "Titel ist leider vergriffen", mark buyable=False + * if it also contains "keine Neuauflage", set unavailable_hint accordingly + If drop_unbuyable=True, exclude non-buyable results from the returned list. + """ + enriched: List[LehmannsSearchResult] = [] + for r in results: + try: + html = self._get(r.url) + if not html: + # Can't verify; keep as-is when not dropping, else skip + if not drop_unbuyable: + enriched.append(r) + continue + + soup = BeautifulSoup(html, "html.parser") # type: ignore + + # Pages + pages_node = soup.select_one( # type: ignore + "span.book-meta.meta-seiten[itemprop='numberOfPages'], " + "span.book-meta.meta-seiten[itemprop='numberofpages'], " + ".meta-seiten [itemprop='numberOfPages'], " + ".meta-seiten[itemprop='numberOfPages'], " + ".book-meta.meta-seiten" + ) + if pages_node: + text = pages_node.get_text(" ", strip=True) + m = re.search(r"\d+", text) + if m: + r.pages = f"{m.group(0)} Seiten" + + # Availability via li.availability-3 + avail_li = soup.select_one("li.availability-3") # type: ignore + if avail_li: + avail_text = " ".join( + avail_li.get_text(" ", strip=True).split() + ).lower() + if "titel ist leider vergriffen" in avail_text: + r.buyable = False + if "keine neuauflage" in avail_text: + r.unavailable_hint = ( + "Titel ist leider vergriffen; keine Neuauflage" + ) + else: + r.unavailable_hint = "Titel ist leider vergriffen" + + # Append or drop + if (not drop_unbuyable) or r.buyable: + enriched.append(r) + + except Exception: + # On any per-item error, keep the record if not dropping; else skip + if not drop_unbuyable: + enriched.append(r) + continue + + return enriched + + # ------------------- Internals ------------------- + + def _get(self, url: str) -> Optional[str]: + try: + r = self.client.get(url) + r.encoding = "utf-8" + if r.status_code == 200 and "text/html" in ( + r.headers.get("content-type") or "" + ): + return r.text + except httpx.HTTPError: + pass + return None + + def _parse_results(self, html: str) -> List[LehmannsSearchResult]: + soup = BeautifulSoup(html, "html.parser") + results: list[LehmannsSearchResult] = [] + + for block in soup.select("div.info-block"): + a = block.select_one(".title a[href]") + if not a: + continue + url = urljoin(BASE, a["href"].strip()) + base_title = (block.select_one(".title [itemprop='name']") or a).get_text( # type: ignore + strip=True + ) + + # Alternative headline => extend title + alt_tag = block.select_one(".description[itemprop='alternativeHeadline']") # type: ignore + alternative_headline = alt_tag.get_text(strip=True) if alt_tag else None + title = ( + f"{base_title} : {alternative_headline}" + if alternative_headline + else base_title + ) + description = alternative_headline + + # Authors from .author + authors: list[str] = [] + author_div = block.select_one("div.author") # type: ignore + if author_div: + t = author_div.get_text(" ", strip=True) + t = re.sub(r"^\s*von\s+", "", t, flags=re.I) + for part in re.split(r"\s*;\s*|\s*&\s*|\s+und\s+", t): + name = " ".join(part.split()) + if name: + authors.append(name) + + # Media + format + media_type = None + book_format = None + type_text = block.select_one(".type") # type: ignore + if type_text: + t = type_text.get_text(" ", strip=True) + m = re.search(r"\b(Buch|eBook|Hörbuch)\b", t) + if m: + media_type = m.group(1) + fm = re.search(r"\(([^)]+)\)", t) + if fm: + book_format = fm.group(1).strip().upper() + + # Year + year = None + y = block.select_one("[itemprop='copyrightYear']") # type: ignore + if y: + try: + year = int(y.get_text(strip=True)) + except ValueError: + pass + + # Edition + edition = None + ed = block.select_one("[itemprop='bookEdition']") # type: ignore + if ed: + m = re.search(r"\d+", ed.get_text(strip=True)) + if m: + edition = int(m.group()) + + # Publisher + publisher = None + pub = block.select_one( # type: ignore + ".publisherprop [itemprop='name']" + ) or block.select_one(".publisher [itemprop='name']") # type: ignore + if pub: + publisher = pub.get_text(strip=True) + + # ISBN-13 + isbn13 = None + isbn_tag = block.select_one(".isbn [itemprop='isbn'], [itemprop='isbn']") # type: ignore + if isbn_tag: + digits = re.sub(r"[^0-9Xx]", "", isbn_tag.get_text(strip=True)) + m = re.search(r"(97[89]\d{10})", digits) + if m: + isbn13 = m.group(1) + + # Price (best effort) + price_eur = None + txt = block.get_text(" ", strip=True) + mprice = re.search(r"(\d{1,3}(?:\.\d{3})*,\d{2})\s*€", txt) + if not mprice and block.parent: + sib = block.parent.get_text(" ", strip=True) + mprice = re.search(r"(\d{1,3}(?:\.\d{3})*,\d{2})\s*€", sib) + if mprice: + num = mprice.group(1).replace(".", "").replace(",", ".") + try: + price_eur = float(num) + except ValueError: + pass + + # Image (best-effort) + image = None + left_img = block.find_previous("img") # type: ignore + if left_img and left_img.get("src"): + image = urljoin(BASE, left_img["src"]) + + results.append( + LehmannsSearchResult( + title=title, + url=url, + description=description, + authors=authors, + media_type=media_type, + book_format=book_format, + year=year, + edition=edition, + publisher=publisher, + isbn13=isbn13, + price_eur=price_eur, + image=image, + ) + ) + + return results diff --git a/src/services/openai.py b/src/services/openai.py new file mode 100644 index 0000000..715be68 --- /dev/null +++ b/src/services/openai.py @@ -0,0 +1,58 @@ +import json +from typing import Any + +from openai import OpenAI + +from src import settings + + +def init_client() -> OpenAI: + """Initialize the OpenAI client with the API key and model from settings.""" + global client, model, api_key + if not settings.openAI.api_key: + raise ValueError("OpenAI API key is not set in the configuration.") + if not settings.openAI.model: + raise ValueError("OpenAI model is not set in the configuration.") + + model = settings.openAI.model + api_key = settings.openAI.api_key + client = OpenAI(api_key=api_key) + return client + + +def run_shortener(title: str, length: int) -> list[dict[str, Any]]: + client = init_client() + response = client.responses.create( # type: ignore + model=model, + instructions="""you are a sentence shortener. The next message will contain the string to shorten and the length limit. +You need to shorten the string to be under the length limit, while keeping as much detail as possible. The result may NOT be longer than the length limit. +based on that, please reply only the shortened string. Give me 5 choices. if the length is too long, discard the string and try another one.Return the data as a python list containing the result as {"shortened_string": shortened_string, "length": lengthasInt}. Do not return the answer in a codeblock, use a pure string. Before answering, check the results and if ANY is longer than the needed_length, discard all and try again""", + input=f'{{"string":"{title}", "needed_length":{length}}}', + ) + answers = response.output_text + return eval(answers) # type: ignore + # answers are strings in json format, so we need to convert them to a list of dicts + + +def name_tester(name: str) -> dict: + client = init_client() + response = client.responses.create( # type: ignore + model=model, + instructions="""you are a name tester, You are given a name and will have to split the name into first name, last name, and if present the title. Return the name in a json format with the keys "title", "first_name", "last_name". If no title is present, set title to none. Do NOt return the answer in a codeblock, use a pure json string. Assume the names are in the usual german naming scheme""", + input=f'{{"name":"{name}"}}', + ) + answers = response.output_text + + return json.loads(answers) + + +def semester_converter(semester: str) -> str: + client = init_client() + response = client.responses.create( # type: ignore + model=model, + instructions="""you are a semester converter. You will be given a string. Convert this into a string like this: SoSe YY or WiSe YY/YY+1. Do not return the answer in a codeblock, use a pure string.""", + input=semester, + ) + answers = response.output_text + + return answers diff --git a/src/services/sru.py b/src/services/sru.py new file mode 100644 index 0000000..9705995 --- /dev/null +++ b/src/services/sru.py @@ -0,0 +1,631 @@ +import re +import xml.etree.ElementTree as ET +from dataclasses import dataclass, field +from enum import Enum +from typing import Dict, Iterable, List, Optional, Tuple, Union + +import requests +from requests.adapters import HTTPAdapter + +# centralized logging used via src.shared.logging +from src.core.models import BookData +from src.shared.logging import log + +log # ensure imported logger is referenced + + +# ----------------------- +# Dataclasses +# ----------------------- + + +# --- MARC XML structures --- +@dataclass +class ControlField: + tag: str + value: str + + +@dataclass +class SubField: + code: str + value: str + + +@dataclass +class DataField: + tag: str + ind1: str = " " + ind2: str = " " + subfields: List[SubField] = field(default_factory=list) + + +@dataclass +class MarcRecord: + leader: str + controlfields: List[ControlField] = field(default_factory=list) + datafields: List[DataField] = field(default_factory=list) + + +# --- SRU record wrapper --- +@dataclass +class Record: + recordSchema: str + recordPacking: str + recordData: MarcRecord + recordPosition: int + + +@dataclass +class EchoedSearchRequest: + version: str + query: str + maximumRecords: int + recordPacking: str + recordSchema: str + + +@dataclass +class SearchRetrieveResponse: + version: str + numberOfRecords: int + records: List[Record] = field(default_factory=list) + echoedSearchRetrieveRequest: Optional[EchoedSearchRequest] = None + + +# ----------------------- +# Parser +# ----------------------- + +ZS = "http://www.loc.gov/zing/srw/" +MARC = "http://www.loc.gov/MARC21/slim" +NS = {"zs": ZS, "marc": MARC} + + +def _text(elem: Optional[ET.Element]) -> str: + return (elem.text or "") if elem is not None else "" + + +def _req_text(parent: ET.Element, path: str) -> Optional[str]: + el = parent.find(path, NS) + if el is None or el.text is None: + return None + return el.text + + +def parse_marc_record(record_el: ET.Element) -> MarcRecord: + """ + record_el is the element (default ns MARC in your sample) + """ + # leader + leader_text = _req_text(record_el, "marc:leader") or "" + + # controlfields + controlfields: List[ControlField] = [] + for cf in record_el.findall("marc:controlfield", NS): + tag = cf.get("tag", "").strip() + controlfields.append(ControlField(tag=tag, value=_text(cf))) + + # datafields + datafields: List[DataField] = [] + for df in record_el.findall("marc:datafield", NS): + tag = df.get("tag", "").strip() + ind1 = df.get("ind1") or " " + ind2 = df.get("ind2") or " " + subfields: List[SubField] = [] + for sf in df.findall("marc:subfield", NS): + code = sf.get("code", "") + subfields.append(SubField(code=code, value=_text(sf))) + datafields.append(DataField(tag=tag, ind1=ind1, ind2=ind2, subfields=subfields)) + + return MarcRecord( + leader=leader_text, controlfields=controlfields, datafields=datafields + ) + + +def parse_record(zs_record_el: ET.Element) -> Record: + recordSchema = _req_text(zs_record_el, "zs:recordSchema") or "" + recordPacking = _req_text(zs_record_el, "zs:recordPacking") or "" + + # recordData contains a MARC with default MARC namespace in your sample + recordData_el = zs_record_el.find("zs:recordData", NS) + if recordData_el is None: + raise ValueError("Missing zs:recordData") + + marc_record_el = recordData_el.find("marc:record", NS) + if marc_record_el is None: + # If the MARC record uses default ns (xmlns="...") ElementTree still needs the ns-qualified name + # We already searched with prefix; this covers both default and prefixed cases. + raise ValueError("Missing MARC21 record inside zs:recordData") + + marc_record = parse_marc_record(marc_record_el) + + recordPosition = int(_req_text(zs_record_el, "zs:recordPosition") or "0") + return Record( + recordSchema=recordSchema, + recordPacking=recordPacking, + recordData=marc_record, + recordPosition=recordPosition, + ) + + +def parse_echoed_request(root: ET.Element) -> Optional[EchoedSearchRequest]: + el = root.find("zs:echoedSearchRetrieveRequest", NS) + if el is None: + return None + + # Be permissive with missing fields + version = _text(el.find("zs:version", NS)) + query = _text(el.find("zs:query", NS)) + maximumRecords_text = _text(el.find("zs:maximumRecords", NS)) or "0" + recordPacking = _text(el.find("zs:recordPacking", NS)) + recordSchema = _text(el.find("zs:recordSchema", NS)) + + try: + maximumRecords = int(maximumRecords_text) + except ValueError: + maximumRecords = 0 + + return EchoedSearchRequest( + version=version, + query=query, + maximumRecords=maximumRecords, + recordPacking=recordPacking, + recordSchema=recordSchema, + ) + + +def parse_search_retrieve_response( + xml_str: Union[str, bytes], +) -> SearchRetrieveResponse: + root = ET.fromstring(xml_str) + + # Root is zs:searchRetrieveResponse + version = _req_text(root, "zs:version") + numberOfRecords = int(_req_text(root, "zs:numberOfRecords") or "0") + + records_parent = root.find("zs:records", NS) + records: List[Record] = [] + if records_parent is not None: + for r in records_parent.findall("zs:record", NS): + records.append(parse_record(r)) + + echoed = parse_echoed_request(root) + + return SearchRetrieveResponse( + version=version, + numberOfRecords=numberOfRecords, + records=records, + echoedSearchRetrieveRequest=echoed, + ) + + +# --- Query helpers over MarcRecord --- + + +def iter_datafields( + rec: MarcRecord, + tag: Optional[str] = None, + ind1: Optional[str] = None, + ind2: Optional[str] = None, +) -> Iterable[DataField]: + """Yield datafields, optionally filtered by tag/indicators.""" + for df in rec.datafields: + if tag is not None and df.tag != tag: + continue + if ind1 is not None and df.ind1 != ind1: + continue + if ind2 is not None and df.ind2 != ind2: + continue + yield df + + +def subfield_values( + rec: MarcRecord, + tag: str, + code: str, + *, + ind1: Optional[str] = None, + ind2: Optional[str] = None, +) -> List[str]: + """All values for subfield `code` in every `tag` field (respecting indicators).""" + out: List[str] = [] + for df in iter_datafields(rec, tag, ind1, ind2): + out.extend(sf.value for sf in df.subfields if sf.code == code) + return out + + +def first_subfield_value( + rec: MarcRecord, + tag: str, + code: str, + *, + ind1: Optional[str] = None, + ind2: Optional[str] = None, + default: Optional[str] = None, +) -> Optional[str]: + """First value for subfield `code` in `tag` (respecting indicators).""" + for df in iter_datafields(rec, tag, ind1, ind2): + for sf in df.subfields: + if sf.code == code: + return sf.value + return default + + +def find_datafields_with_subfields( + rec: MarcRecord, + tag: str, + *, + where_all: Optional[Dict[str, str]] = None, + where_any: Optional[Dict[str, str]] = None, + casefold: bool = False, + ind1: Optional[str] = None, + ind2: Optional[str] = None, +) -> List[DataField]: + """ + Return datafields of `tag` whose subfields match constraints: + - where_all: every (code -> exact value) must be present + - where_any: at least one (code -> exact value) present + Set `casefold=True` for case-insensitive comparison. + """ + where_all = where_all or {} + where_any = where_any or {} + matched: List[DataField] = [] + + for df in iter_datafields(rec, tag, ind1, ind2): + # Map code -> list of values (with optional casefold applied) + vals: Dict[str, List[str]] = {} + for sf in df.subfields: + v = sf.value.casefold() if casefold else sf.value + vals.setdefault(sf.code, []).append(v) + + ok = True + for c, v in where_all.items(): + vv = v.casefold() if casefold else v + if c not in vals or vv not in vals[c]: + ok = False + break + + if ok and where_any: + any_ok = any( + (c in vals) and ((v.casefold() if casefold else v) in vals[c]) + for c, v in where_any.items() + ) + if not any_ok: + ok = False + + if ok: + matched.append(df) + + return matched + + +def controlfield_value( + rec: MarcRecord, tag: str, default: Optional[str] = None +) -> Optional[str]: + """Get the first controlfield value by tag (e.g., '001', '005').""" + for cf in rec.controlfields: + if cf.tag == tag: + return cf.value + return default + + +def datafields_value( + data: List[DataField], code: str, default: Optional[str] = None +) -> Optional[str]: + """Get the first value for a specific subfield code in a list of datafields.""" + for df in data: + for sf in df.subfields: + if sf.code == code: + return sf.value + return default + + +def datafield_value( + df: DataField, code: str, default: Optional[str] = None +) -> Optional[str]: + """Get the first value for a specific subfield code in a datafield.""" + for sf in df.subfields: + if sf.code == code: + return sf.value + return default + + +def _smart_join_title(a: str, b: Optional[str]) -> str: + """ + Join 245 $a and $b with MARC-style punctuation. + If $b is present, join with ' : ' unless either side already supplies punctuation. + """ + a = a.strip() + if not b: + return a + b = b.strip() + if a.endswith((":", ";", "/")) or b.startswith((":", ";", "/")): + return f"{a} {b}" + return f"{a} : {b}" + + +def subfield_values_from_fields( + fields: Iterable[DataField], + code: str, +) -> List[str]: + """All subfield values with given `code` across a list of DataField.""" + return [sf.value for df in fields for sf in df.subfields if sf.code == code] + + +def first_subfield_value_from_fields( + fields: Iterable[DataField], + code: str, + default: Optional[str] = None, +) -> Optional[str]: + """First subfield value with given `code` across a list of DataField.""" + for df in fields: + for sf in df.subfields: + if sf.code == code: + return sf.value + return default + + +def subfield_value_pairs_from_fields( + fields: Iterable[DataField], + code: str, +) -> List[Tuple[DataField, str]]: + """ + Return (DataField, value) pairs for all subfields with `code`. + Useful if you need to know which field a value came from. + """ + out: List[Tuple[DataField, str]] = [] + for df in fields: + for sf in df.subfields: + if sf.code == code: + out.append((df, sf.value)) + return out + + +def book_from_marc(rec: MarcRecord) -> BookData: + # PPN from controlfield 001 + ppn = controlfield_value(rec, "001") + + # Title = 245 $a + 245 $b (if present) + t_a = first_subfield_value(rec, "245", "a") + t_b = first_subfield_value(rec, "245", "b") + title = _smart_join_title(t_a, t_b) if t_a else None + + # Signature = 924 where $9 == "Frei 129" → take that field's $g + frei_fields = find_datafields_with_subfields( + rec, "924", where_all={"9": "Frei 129"} + ) + signature = first_subfield_value_from_fields(frei_fields, "g") + + # Year = 264 $c (prefer ind2="1" publication; fallback to any 264) + year = first_subfield_value(rec, "264", "c", ind2="1") or first_subfield_value( + rec, "264", "c" + ) + isbn = subfield_values(rec, "020", "a") + mediatype = first_subfield_value(rec, "338", "a") + lang = subfield_values(rec, "041", "a") + authors = subfield_values(rec, "700", "a") + author = None + if authors: + author = "; ".join(authors) + + return BookData( + ppn=ppn, + title=title, + signature=signature, + edition=first_subfield_value(rec, "250", "a") or "", + year=year, + pages=first_subfield_value(rec, "300", "a") or "", + publisher=first_subfield_value(rec, "264", "b") or "", + isbn=isbn, + language=lang, + link="", + author=author, + media_type=mediatype, + ) + + +class SWBData(Enum): + URL = "https://sru.k10plus.de/opac-de-627!rec=1?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=marcxml" + ARGSCHEMA = "pica." + NAME = "SWB" + + +class DNBData(Enum): + URL = "https://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=MARC21-xml" + ARGSCHEMA = "" + NAME = "DNB" + + +class SRUSite(Enum): + SWB = SWBData + DNB = DNBData + + +RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative char set typically seen in RVK notations + + +def find_newer_edition( + swb_result: BookData, dnb_result: List[BookData] +) -> Optional[List[BookData]]: + """ + New edition if: + - year > swb.year OR + - edition_number > swb.edition_number + + Additional guards & preferences: + - If both have signatures and they differ, skip (not the same work). + - For duplicates (same ppn): keep the one that has a signature, and + prefer a signature that matches swb_result.signature. + - If multiple remain: keep the single 'latest' by (year desc, + edition_number desc, best-signature-match desc, has-signature desc). + """ + + def norm_sig(s: Optional[str]) -> str: + if not s: + return "" + # normalize: lowercase, collapse whitespace, keep alnum + a few separators + s = s.lower() + s = re.sub(r"\s+", " ", s).strip() + # remove obvious noise; adjust if your signature format differs + s = re.sub(r"[^a-z0-9\-_/\. ]+", "", s) + return s + + def has_sig(b: BookData) -> bool: + return bool(getattr(b, "signature", None)) + + def sig_matches_swb(b: BookData) -> bool: + if not has_sig(b) or not has_sig(swb_result): + return False + return norm_sig(b.signature) == norm_sig(swb_result.signature) + + def strictly_newer(b: BookData) -> bool: + by_year = ( + b.year is not None + and swb_result.year is not None + and b.year > swb_result.year + ) + by_edition = ( + b.edition_number is not None + and swb_result.edition_number is not None + and b.edition_number > swb_result.edition_number + ) + return by_year or by_edition + + swb_sig_norm = norm_sig(getattr(swb_result, "signature", None)) + + # 1) Filter to same-work AND newer + candidates: List[BookData] = [] + for b in dnb_result: + # Skip if both signatures exist and don't match (different work) + b_sig = getattr(b, "signature", None) + if b_sig and swb_result.signature: + if norm_sig(b_sig) != swb_sig_norm: + continue # not the same work + + # Keep only if newer by rules + if strictly_newer(b): + candidates.append(b) + + if not candidates: + return None + + # 2) Dedupe by PPN, preferring signature (and matching signature if possible) + by_ppn: dict[Optional[str], BookData] = {} + for b in candidates: + key = getattr(b, "ppn", None) + prev = by_ppn.get(key) + if prev is None: + by_ppn[key] = b + continue + + # Compute preference score for both + def ppn_pref_score(x: BookData) -> tuple[int, int]: + # (signature matches swb, has signature) + return (1 if sig_matches_swb(x) else 0, 1 if has_sig(x) else 0) + + if ppn_pref_score(b) > ppn_pref_score(prev): + by_ppn[key] = b + + deduped = list(by_ppn.values()) + if not deduped: + return None + + # 3) If multiple remain, keep only the latest one. + # Order: year desc, edition_number desc, signature-match desc, has-signature desc + def sort_key(b: BookData): + year = b.year if b.year is not None else -1 + ed = b.edition_number if b.edition_number is not None else -1 + sig_match = 1 if sig_matches_swb(b) else 0 + sig_present = 1 if has_sig(b) else 0 + return (year, ed, sig_match, sig_present) + + best = max(deduped, key=sort_key) + return [best] if best else None + + +class Api: + def __init__(self, site: str, url: str, prefix: str): + self.site = site + self.url = url + self.prefix = prefix + # Reuse TCP connections across requests for better performance + self._session = requests.Session() + # Slightly larger connection pool for concurrent calls + adapter = HTTPAdapter(pool_connections=10, pool_maxsize=20) + self._session.mount("http://", adapter) + self._session.mount("https://", adapter) + + def close(self): + try: + self._session.close() + except Exception: + pass + + def __del__(self): + # Best-effort cleanup + self.close() + + def get(self, query_args: Iterable[str]) -> List[Record]: + # if any query_arg ends with =, remove it + if self.site == "DNB": + args = [arg for arg in query_args if not arg.startswith("pica.")] + if args == []: + raise ValueError("DNB queries must include at least one search term") + query_args = args + # query_args = [f"{self.prefix}{arg}" for arg in query_args] + query = "+and+".join(query_args) + query = query.replace(" ", "%20").replace("&", "%26") + # query_args = [arg for arg in query_args if not arg.endswith("=")] + # query = "+and+".join(query_args) + # query = query.replace(" ", "%20").replace("&", "%26") + # insert the query into the url url is + url = self.url.format(query) + + log.debug(url) + headers = { + "User-Agent": f"{self.site} SRU Client, ", + "Accept": "application/xml", + "Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3", + } + # Use persistent session and set timeouts to avoid hanging + resp = self._session.get(url, headers=headers, timeout=(3.05, 60)) + if resp.status_code != 200: + raise Exception(f"Error fetching data from SWB: {resp.status_code}") + # Parse using raw bytes (original behavior) to preserve encoding edge cases + sr = parse_search_retrieve_response(resp.content) + return sr.records + + def getBooks(self, query_args: Iterable[str]) -> List[BookData]: + records: List[Record] = self.get(query_args) + # Avoid printing on hot paths; rely on logger if needed + log.debug(f"{self.site} found {len(records)} records for args={query_args}") + books: List[BookData] = [] + # extract title from query_args if present + title = None + for arg in query_args: + if arg.startswith("pica.tit="): + title = arg.split("=")[1] + break + for rec in records: + book = book_from_marc(rec.recordData) + books.append(book) + if title: + books = [ + b + for b in books + if b.title and b.title.lower().startswith(title.lower()) + ] + return books + + def getLinkForBook(self, book: BookData) -> str: + # Not implemented: depends on catalog front-end; return empty string for now + return "" + + +class SWB(Api): + def __init__(self): + self.site = SWBData.NAME.value + self.url = SWBData.URL.value + self.prefix = SWBData.ARGSCHEMA.value + super().__init__(self.site, self.url, self.prefix) diff --git a/src/services/webadis.py b/src/services/webadis.py new file mode 100644 index 0000000..af79650 --- /dev/null +++ b/src/services/webadis.py @@ -0,0 +1,35 @@ +from playwright.sync_api import sync_playwright + + +def get_book_medianr(signature: str, semesterapparat_nr: int, auth: tuple) -> str: + with sync_playwright() as playwright: + browser = playwright.chromium.launch(headless=True) + context = browser.new_context() + page = context.new_page() + page.goto( + "https://bsz.ibs-bw.de:22998/aDISWeb/app?service=direct/0/Home/$DirectLink&sp=SDAP42" + ) + page.get_by_role("textbox", name="Benutzer").fill(auth[0]) + page.get_by_role("textbox", name="Benutzer").press("Tab") + page.get_by_role("textbox", name="Kennwort").fill(auth[1]) + page.get_by_role("textbox", name="Kennwort").press("Enter") + page.get_by_role("button", name="Katalog").click() + page.get_by_role("textbox", name="Signatur").click() + page.get_by_role("textbox", name="Signatur").fill(signature) + page.get_by_role("textbox", name="Signatur").press("Enter") + book_list = page.locator("iframe").content_frame.get_by_role( + "cell", name="Bibliothek der Pädagogischen" + ) + # this will always find one result, we need to split the resulting text based on the entries that start with "* " + book_entries = book_list.inner_text().split("\n") + books = [] + for entry in book_entries: + if entry.startswith("* "): + books.append(entry) + for book in books: + if f"Semesterapparat: {semesterapparat_nr}" in book: + return book.split("* ")[1].split(":")[0] + + # --------------------- + context.close() + browser.close() diff --git a/src/services/webrequest.py b/src/services/webrequest.py new file mode 100644 index 0000000..947a8ba --- /dev/null +++ b/src/services/webrequest.py @@ -0,0 +1,314 @@ +from enum import Enum +from typing import Any, Optional, Union + +import requests +from bs4 import BeautifulSoup + +# import sleep_and_retry decorator to retry requests +from ratelimit import limits, sleep_and_retry + +from src.core.models import BookData +from src.shared.logging import log +from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData +from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA + +# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO") + + +API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/" +PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND" +BASE = "https://rds.ibs-bw.de" +# +TITLE = "RDS_TITLE" +SIGNATURE = "RDS_SIGNATURE" +EDITION = "RDS_EDITION" +ISBN = "RDS_ISBN" +AUTHOR = "RDS_PERSON" + +HEADERS = { + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \ + (HTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36", + "Accept-Language": "en-US, en;q=0.5", +} +RATE_LIMIT = 20 +RATE_PERIOD = 30 + + +class TransformerType(Enum): + ARRAY = "ARRAY" + COinS = "COinS" + BibTeX = "BibTeX" + RIS = "RIS" + RDS = "RDS" + + +class WebRequest: + def __init__(self) -> None: + """Request data from the web, and format it depending on the mode.""" + self.apparat = None + self.use_any = False # use any book that matches the search term + self.signature = None + self.ppn = None + self.data = None + self.timeout = 5 + log.info("Initialized WebRequest") + + @property + def use_any_book(self): + """use any book that matches the search term""" + self.use_any = True + log.info("Using any book") + return self + + def set_apparat(self, apparat: int) -> "WebRequest": + self.apparat = apparat + if int(self.apparat) < 10: + self.apparat = f"0{self.apparat}" + log.info(f"Set apparat to {self.apparat}") + return self + + def get_ppn(self, signature: str) -> "WebRequest": + self.signature = signature + if "+" in signature: + signature = signature.replace("+", "%2B") + if "doi.org" in signature: + signature = signature.split("/")[-1] + self.ppn = signature + return self + + @sleep_and_retry + @limits(calls=RATE_LIMIT, period=RATE_PERIOD) + def search_book(self, searchterm: str) -> str: + response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout) + return response.text + + @sleep_and_retry + @limits(calls=RATE_LIMIT, period=RATE_PERIOD) + def search_ppn(self, ppn: str) -> str: + response = requests.get(API_URL.format(ppn), timeout=self.timeout) + return response.text + + def get_book_links(self, searchterm: str) -> list[str]: + response: str = self.search_book(searchterm) # type:ignore + soup = BeautifulSoup(response, "html.parser") + links = soup.find_all("a", class_="title getFull") + res: list[str] = [] + for link in links: + res.append(BASE + link["href"]) + return res + + @sleep_and_retry + @limits(calls=RATE_LIMIT, period=RATE_PERIOD) + def search(self, link: str) -> Optional[str]: + try: + response = requests.get(link, timeout=self.timeout) + return response.text + except requests.exceptions.RequestException as e: + log.error(f"Request failed: {e}") + return None + + def get_data(self) -> Optional[list[str]]: + links = self.get_book_links(self.ppn) + log.debug(f"Links: {links}") + return_data: list[str] = [] + for link in links: + result: str = self.search(link) # type:ignore + # in result search for class col-xs-12 rds-dl RDS_LOCATION + # if found, return text of href + soup = BeautifulSoup(result, "html.parser") + locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION") + if locations: + for location in locations: + if "1. OG Semesterapparat" in location.text: + log.success("Found Semesterapparat, adding entry") + pre_tag = soup.find_all("pre") + return_data = [] + if pre_tag: + for tag in pre_tag: + data = tag.text.strip() + return_data.append(data) + return return_data + else: + log.error("No
     tag found")
    +                            return return_data
    +                    else:
    +                        item_location = location.find(
    +                            "div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
    +                        ).text.strip()
    +                        log.debug(f"Item location: {item_location}")
    +                        if self.use_any:
    +                            pre_tag = soup.find_all("pre")
    +                            if pre_tag:
    +                                for tag in pre_tag:
    +                                    data = tag.text.strip()
    +                                    return_data.append(data)
    +                                return return_data
    +                            else:
    +                                log.error("No 
     tag found")
    +                                raise ValueError("No 
     tag found")
    +                        elif f"Semesterapparat-{self.apparat}" in item_location:
    +                            pre_tag = soup.find_all("pre")
    +                            return_data = []
    +                            if pre_tag:
    +                                for tag in pre_tag:
    +                                    data = tag.text.strip()
    +                                    return_data.append(data)
    +                                return return_data
    +                            else:
    +                                log.error("No 
     tag found")
    +                                return return_data
    +                        else:
    +                            log.error(
    +                                f"Signature {self.signature} not found in {item_location}"
    +                            )
    +                            # return_data = []
    +
    +        return return_data
    +
    +    def get_data_elsa(self) -> Optional[list[str]]:
    +        links = self.get_book_links(self.ppn)
    +        for link in links:
    +            result = self.search(link)
    +            # in result search for class col-xs-12 rds-dl RDS_LOCATION
    +            # if found, return text of href
    +            soup = BeautifulSoup(result, "html.parser")
    +            locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
    +            if locations:
    +                for _ in locations:
    +                    pre_tag = soup.find_all("pre")
    +                    return_data = []
    +                    if pre_tag:
    +                        for tag in pre_tag:
    +                            data = tag.text.strip()
    +                            return_data.append(data)
    +                        return return_data
    +                    else:
    +                        log.error("No 
     tag found")
    +                        return return_data
    +
    +
    +class BibTextTransformer:
    +    """Transforms data from the web into a BibText format.
    +        Valid Modes are ARRAY, COinS, BibTeX, RIS, RDS
    +    Raises:
    +        ValueError: Raised if mode is not in valid_modes
    +    """
    +
    +    valid_modes = [
    +        TransformerType.ARRAY,
    +        TransformerType.COinS,
    +        TransformerType.BibTeX,
    +        TransformerType.RIS,
    +        TransformerType.RDS,
    +    ]
    +
    +    def __init__(self, mode: TransformerType = TransformerType.ARRAY) -> None:
    +        self.mode = mode.value
    +        self.field = None
    +        self.signature = None
    +        if mode not in self.valid_modes:
    +            log.error(f"Mode {mode} not valid")
    +            raise ValueError(f"Mode {mode} not valid")
    +        self.data = None
    +        # self.bookdata = BookData(**self.data)
    +
    +    def use_signature(self, signature: str) -> "BibTextTransformer":
    +        """use the exact signature to search for the book"""
    +        self.signature = signature
    +        return self
    +
    +    def get_data(self, data: Optional[list[str]] = None) -> "BibTextTransformer":
    +        RIS_IDENT = "TY  -"
    +        ARRAY_IDENT = "[kid]"
    +        COinS_IDENT = "ctx_ver"
    +        BIBTEX_IDENT = "@book"
    +        RDS_IDENT = "RDS ---------------------------------- "
    +
    +        if data is None:
    +            self.data = None
    +            return self
    +
    +        if self.mode == "RIS":
    +            for line in data:
    +                if RIS_IDENT in line:
    +                    self.data = line
    +        elif self.mode == "ARRAY":
    +            for line in data:
    +                if ARRAY_IDENT in line:
    +                    self.data = line
    +        elif self.mode == "COinS":
    +            for line in data:
    +                if COinS_IDENT in line:
    +                    self.data = line
    +        elif self.mode == "BibTeX":
    +            for line in data:
    +                if BIBTEX_IDENT in line:
    +                    self.data = line
    +        elif self.mode == "RDS":
    +            for line in data:
    +                if RDS_IDENT in line:
    +                    self.data = line
    +        return self
    +
    +    def return_data(
    +        self, option: Any = None
    +    ) -> Union[
    +        Optional[BookData],
    +        Optional[RDS_GENERIC_DATA],
    +        Optional[RDS_AVAIL_DATA],
    +        None,
    +        dict[str, Union[RDS_AVAIL_DATA, RDS_GENERIC_DATA]],
    +    ]:
    +        """Return Data to caller.
    +
    +        Args:
    +            option (string, optional): Option for RDS as there are two filetypes. Use rds_availability or rds_data. Anything else gives a dict of both responses. Defaults to None.
    +
    +        Returns:
    +            BookData: a dataclass containing data about the book
    +        """
    +        if self.data is None:
    +            return None
    +        match self.mode:
    +            case "ARRAY":
    +                return ARRAYData(self.signature).transform(self.data)
    +            case "COinS":
    +                return COinSData().transform(self.data)
    +            case "BibTeX":
    +                return BibTeXData().transform(self.data)
    +            case "RIS":
    +                return RISData().transform(self.data)
    +            case "RDS":
    +                return RDSData().transform(self.data).return_data(option)
    +            case _:
    +                return None
    +
    +        # if self.mode == "ARRAY":
    +        #     return ARRAYData().transform(self.data)
    +        # elif self.mode == "COinS":
    +        #     return COinSData().transform(self.data)
    +        # elif self.mode == "BibTeX":
    +        #     return BibTeXData().transform(self.data)
    +        # elif self.mode == "RIS":
    +        #     return RISData().transform(self.data)
    +        # elif self.mode == "RDS":
    +        #     return RDSData().transform(self.data).return_data(option)
    +
    +
    +def cover(isbn):
    +    test_url = f"https://www.buchhandel.de/cover/{isbn}/{isbn}-cover-m.jpg"
    +    # log.debug(test_url)
    +    data = requests.get(test_url, stream=True)
    +    return data.content
    +
    +
    +def get_content(soup, css_class):
    +    return soup.find("div", class_=css_class).text.strip()
    +
    +
    +if __name__ == "__main__":
    +    # log.debug("main")
    +    link = "CU 8500 K64"
    +    data = WebRequest(71).get_ppn(link).get_data()
    +    bib = BibTextTransformer("ARRAY").get_data().return_data()
    +    log.debug(bib)
    diff --git a/src/services/zotero.py b/src/services/zotero.py
    new file mode 100644
    index 0000000..6ca2588
    --- /dev/null
    +++ b/src/services/zotero.py
    @@ -0,0 +1,340 @@
    +from dataclasses import dataclass
    +from typing import Optional
    +
    +from pyzotero import zotero
    +
    +from src import settings
    +from src.services.webrequest import BibTextTransformer, WebRequest
    +from src.shared.logging import log
    +
    +
    +@dataclass
    +class Creator:
    +    firstName: str = None
    +    lastName: str = None
    +    creatorType: str = "author"
    +
    +    def from_dict(self, data: dict) -> None:
    +        for key, value in data.items():
    +            setattr(self, key, value)
    +
    +    def from_string(self, data: str) -> "Creator":
    +        if "," in data:
    +            self.firstName = data.split(",")[1]
    +            self.lastName = data.split(",")[0]
    +
    +        return self
    +
    +    # set __dict__ object to be used in json
    +
    +
    +@dataclass
    +class Book:
    +    itemType: str = "book"
    +    creators: list[Creator] = None
    +    tags: list = None
    +    collections: list = None
    +    relations: dict = None
    +    title: str = None
    +    abstractNote: str = None
    +    series: str = None
    +    seriesNumber: str = None
    +    volume: str = None
    +    numberOfVolumes: str = None
    +    edition: str = None
    +    place: str = None
    +    publisher: str = None
    +    date: str = None
    +    numPages: str = None
    +    language: str = None
    +    ISBN: str = None
    +    shortTitle: str = None
    +    url: str = None
    +    accessDate: str = None
    +    archive: str = None
    +    archiveLocation: str = None
    +    libraryCatalog: str = None
    +    callNumber: str = None
    +    rights: str = None
    +    extra: str = None
    +
    +    def to_dict(self) -> dict:
    +        ret = {}
    +        for key, value in self.__dict__.items():
    +            if value:
    +                ret[key] = value
    +        return ret
    +
    +
    +@dataclass
    +class BookSection:
    +    itemType: str = "bookSection"
    +    title: str = None
    +    creators: list[Creator] = None
    +    abstractNote: str = None
    +    bookTitle: str = None
    +    series: str = None
    +    seriesNumber: str = None
    +    volume: str = None
    +    numberOfVolumes: str = None
    +    edition: str = None
    +    place: str = None
    +    publisher: str = None
    +    date: str = None
    +    pages: str = None
    +    language: str = None
    +    ISBN: str = None
    +    shortTitle: str = None
    +    url: str = None
    +    accessDate: str = None
    +    archive: str = None
    +    archiveLocation: str = None
    +    libraryCatalog: str = None
    +    callNumber: str = None
    +    rights: str = None
    +    extra: str = None
    +    tags = list
    +    collections = list
    +    relations = dict
    +
    +    def to_dict(self) -> dict:
    +        ret = {}
    +        for key, value in self.__dict__.items():
    +            if value:
    +                ret[key] = value
    +        return ret
    +
    +    def assign(self, book) -> None:
    +        for key, value in book.__dict__.items():
    +            if key in self.__dict__.keys():
    +                try:
    +                    setattr(self, key, value)
    +                except AttributeError:
    +                    pass
    +
    +
    +@dataclass
    +class JournalArticle:
    +    itemType = "journalArticle"
    +    title: str = None
    +    creators: list[Creator] = None
    +    abstractNote: str = None
    +    publicationTitle: str = None
    +    volume: str = None
    +    issue: str = None
    +    pages: str = None
    +    date: str = None
    +    series: str = None
    +    seriesTitle: str = None
    +    seriesText: str = None
    +    journalAbbreviation: str = None
    +    language: str = None
    +    DOI: str = None
    +    ISSN: str = None
    +    shortTitle: str = None
    +    url: str = None
    +    accessDate: str = None
    +    archive: str = None
    +    archiveLocation: str = None
    +    libraryCatalog: str = None
    +    callNumber: str = None
    +    rights: str = None
    +    extra: str = None
    +    tags = list
    +    collections = list
    +    relations = dict
    +
    +    def to_dict(self) -> dict:
    +        ret = {}
    +        for key, value in self.__dict__.items():
    +            if value:
    +                ret[key] = value
    +        return ret
    +
    +    def assign(self, book: dict) -> None:
    +        for key, value in book.__dict__.items():
    +            if key in self.__dict__.keys():
    +                try:
    +                    setattr(self, key, value)
    +                except AttributeError:
    +                    pass
    +
    +
    +class ZoteroController:
    +    zoterocfg = settings.zotero
    +
    +    def __init__(self):
    +        if self.zoterocfg.library_id is None:
    +            return
    +        self.zot = zotero.Zotero(  # type: ignore
    +            self.zoterocfg.library_id,
    +            self.zoterocfg.library_type,
    +            self.zoterocfg.api_key,
    +        )
    +
    +    def get_books(self) -> list:
    +        ret = []
    +        items = self.zot.top()  # type: ignore
    +        for item in items:
    +            if item["data"]["itemType"] == "book":
    +                ret.append(item)
    +        return ret
    +
    +    # create item in zotero
    +    # item is a part of a book
    +    def __get_data(self, isbn) -> dict:
    +        web = WebRequest()
    +        web.get_ppn(isbn)
    +        data = web.get_data_elsa()
    +        bib = BibTextTransformer()
    +        bib.get_data(data)
    +        book = bib.return_data()
    +        return book
    +
    +    # # #print(zot.item_template("bookSection"))
    +    def createBook(self, isbn) -> Book:
    +        book = self.__get_data(isbn)
    +
    +        bookdata = Book()
    +        bookdata.title = book.title.split(":")[0]
    +        bookdata.ISBN = book.isbn
    +        bookdata.language = book.language
    +        bookdata.date = book.year
    +        bookdata.publisher = book.publisher
    +        bookdata.url = book.link
    +        bookdata.edition = book.edition
    +        bookdata.place = book.place
    +        bookdata.numPages = book.pages
    +        authors = [
    +            Creator().from_string(author).__dict__ for author in book.author.split(";")
    +        ]
    +        authors = [author for author in authors if author["lastName"] is not None]
    +        bookdata.creators = authors
    +        return bookdata
    +
    +    def createItem(self, item) -> Optional[str]:
    +        resp = self.zot.create_items([item])  # type: ignore
    +        if "successful" in resp.keys():
    +            log.debug(resp)
    +            return resp["successful"]["0"]["key"]
    +        else:
    +            return None
    +
    +    def deleteItem(self, key) -> None:
    +        items = self.zot.items()
    +        for item in items:
    +            if item["key"] == key:
    +                self.zot.delete_item(item)  # type: ignore
    +                # #print(item)
    +                break
    +
    +    def createHGSection(self, book: Book, data: dict) -> Optional[str]:
    +        log.debug(book)
    +        chapter = BookSection()
    +        chapter.assign(book)
    +        chapter.pages = data["pages"]
    +        chapter.itemType = "bookSection"
    +        chapter.ISBN = ""
    +        chapter.url = ""
    +        chapter.title = data["chapter_title"]
    +        creators = chapter.creators
    +        for creator in creators:
    +            creator["creatorType"] = "editor"
    +        chapter.creators = creators
    +        authors = [
    +            Creator().from_string(author).__dict__
    +            for author in data["section_author"].split(";")
    +        ]
    +        chapter.creators += authors
    +
    +        log.debug(chapter.to_dict())
    +        return self.createItem(chapter.to_dict())
    +        pass
    +
    +    def createBookSection(self, book: Book, data: dict) -> Optional[str]:
    +        chapter = BookSection()
    +        chapter.assign(book)
    +        chapter.pages = data["pages"]
    +        chapter.itemType = "bookSection"
    +        chapter.ISBN = ""
    +        chapter.url = ""
    +        chapter.title = ""
    +        return self.createItem(chapter.to_dict())
    +        # chapter.creators
    +
    +    def createJournalArticle(self, journal, article) -> Optional[str]:
    +        # #print(type(article))
    +        journalarticle = JournalArticle()
    +        journalarticle.assign(journal)
    +        journalarticle.itemType = "journalArticle"
    +        journalarticle.creators = [
    +            Creator().from_string(author).__dict__
    +            for author in article["section_author"].split(";")
    +        ]
    +        journalarticle.date = article["year"]
    +        journalarticle.title = article["chapter_title"]
    +        journalarticle.publicationTitle = article["work_title"].split(":")[0].strip()
    +        journalarticle.pages = article["pages"]
    +        journalarticle.ISSN = article["isbn"]
    +        journalarticle.issue = article["issue"]
    +        journalarticle.url = article["isbn"]
    +
    +        # #print(journalarticle.to_dict())
    +
    +        return self.createItem(journalarticle.to_dict())
    +
    +    def get_citation(self, item) -> str:
    +        title = self.zot.item(  # type: ignore
    +            item,
    +            content="bib",
    +            style="deutsche-gesellschaft-fur-psychologie",
    +        )[0]
    +        # title = title[0]
    +        title = (
    +            title.replace("", "")
    +            .replace("", "")
    +            .replace('
    ', "") + .replace("
    ", "") + .replace("&", "&") + ) + return title + + +if __name__ == "__main__": + zot = ZoteroController() + book = zot.createBook("DV 3000 D649 (4)") + row = "Döbert, Hans & Hörner, Wolfgang & Kopp, Bortho von & Reuter, Lutz R." + zot.createBookSection() + + # book = Book() + # # # book. + # ISBN = "9783801718718" + # book = createBook(isbn=ISBN) + # chapter = BookSection() + # chapter.title = "Geistige Behinderung" + # chapter.bookTitle = book.title + # chapter.pages = "511 - 538" + # chapter.publisher = book.publisher + # authors = [ + # Creator("Jennifer M.", "Phillips").__dict__, + # Creator("Hower", "Kwon").__dict__, + # Creator("Carl", "Feinstein").__dict__, + # Creator("Inco", "Spintczok von Brisinski").__dict__, + # ] + # publishers = book.author + # if isinstance(publishers, str): + # publishers = [publishers] + # for publisher in publishers: + # # #print(publisher) + # creator = Creator().from_string(publisher) + # creator.creatorType = "editor" + # authors.append(creator.__dict__) + + # chapter.creators = authors + # chapter.publisher = book.publisher + # # #print(chapter.to_dict()) + # createBookSection(chapter.to_dict()) + # get_citation("9ZXH8DDE") + # # # #print() + # # #print(get_books()) + # # #print(zot.item_creator_types("bookSection")) diff --git a/src/shared/__init__.py b/src/shared/__init__.py new file mode 100644 index 0000000..fe09d94 --- /dev/null +++ b/src/shared/__init__.py @@ -0,0 +1,6 @@ +"""Shared utilities and cross-cutting concerns.""" + +from .logging import log +from .config import Settings, load_config + +__all__ = ["log", "Settings", "load_config"] diff --git a/src/shared/config.py b/src/shared/config.py new file mode 100644 index 0000000..029dd15 --- /dev/null +++ b/src/shared/config.py @@ -0,0 +1,66 @@ +"""Application configuration and settings.""" + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import yaml + +from src.shared.logging import log + + +@dataclass +class Settings: + """Settings for the application.""" + + save_path: str + database_name: str + database_path: str + bib_id: str = "" + default_apps: bool = True + custom_applications: list[dict[str, Any]] = field(default_factory=list) + + def save_settings(self, config_path: str | Path = "config.yaml") -> None: + """Save the settings to the config file. + + Args: + config_path: Path to the configuration file + """ + try: + with open(config_path, "w") as f: + yaml.dump(self.__dict__, f) + log.info(f"Settings saved to {config_path}") + except Exception as e: + log.error(f"Failed to save settings: {e}") + raise + + @classmethod + def load_settings(cls, config_path: str | Path = "config.yaml") -> dict[str, Any]: + """Load the settings from the config file. + + Args: + config_path: Path to the configuration file + + Returns: + Dictionary containing the loaded settings + """ + try: + with open(config_path, "r") as f: + data = yaml.safe_load(f) + log.info(f"Settings loaded from {config_path}") + return data + except Exception as e: + log.error(f"Failed to load settings: {e}") + raise + + +def load_config(config_path: str | Path = "config.yaml") -> dict[str, Any]: + """Convenience function to load configuration. + + Args: + config_path: Path to the configuration file + + Returns: + Dictionary containing the loaded settings + """ + return Settings.load_settings(config_path) diff --git a/src/ui/dialogs/Ui_edit_bookdata.py b/src/ui/dialogs/Ui_edit_bookdata.py index eba44e0..f9865d6 100644 --- a/src/ui/dialogs/Ui_edit_bookdata.py +++ b/src/ui/dialogs/Ui_edit_bookdata.py @@ -8,7 +8,7 @@ from PySide6 import QtCore, QtGui, QtWidgets -from src.logic.dataclass import BookData +from src.core.models import BookData class Ui_Dialog(object): diff --git a/src/ui/dialogs/Ui_fileparser.py b/src/ui/dialogs/Ui_fileparser.py index da869db..72fbb60 100644 --- a/src/ui/dialogs/Ui_fileparser.py +++ b/src/ui/dialogs/Ui_fileparser.py @@ -8,7 +8,7 @@ from PySide6 import QtCore, QtGui, QtWidgets -from src.logic.webrequest import BibTextTransformer, WebRequest +from src.services.webrequest import BibTextTransformer, WebRequest class Ui_Dialog(object): diff --git a/src/ui/dialogs/Ui_login.py b/src/ui/dialogs/Ui_login.py index 7844c6b..b9089dd 100644 --- a/src/ui/dialogs/Ui_login.py +++ b/src/ui/dialogs/Ui_login.py @@ -10,8 +10,8 @@ import hashlib from PySide6 import QtCore, QtWidgets -from src.backend.admin_console import AdminCommands -from src.backend.database import Database +from src.admin import AdminCommands +from src.database import Database class Ui_Dialog(object): diff --git a/src/ui/dialogs/bookdata.py b/src/ui/dialogs/bookdata.py index 553a59f..0a7e34d 100644 --- a/src/ui/dialogs/bookdata.py +++ b/src/ui/dialogs/bookdata.py @@ -1,6 +1,6 @@ from PySide6 import QtWidgets -from src.logic.dataclass import BookData +from src.core.models import BookData from .dialog_sources.edit_bookdata_ui import Ui_Dialog diff --git a/src/ui/dialogs/deletedialog.py b/src/ui/dialogs/deletedialog.py index 03d2e43..0c81bdf 100644 --- a/src/ui/dialogs/deletedialog.py +++ b/src/ui/dialogs/deletedialog.py @@ -3,7 +3,7 @@ from typing import Any from PySide6 import QtCore, QtWidgets from src import Icon -from src.backend.database import Database +from src.database import Database from .dialog_sources.deletedialog_ui import Ui_Dialog diff --git a/src/ui/dialogs/docuprint.py b/src/ui/dialogs/docuprint.py index 750fc10..0339238 100644 --- a/src/ui/dialogs/docuprint.py +++ b/src/ui/dialogs/docuprint.py @@ -2,8 +2,8 @@ from natsort import natsorted from PySide6 import QtWidgets from src import Icon -from src.backend import Database -from src.logic import Semester +from src.database import Database +from src.core.models import Semester from src.utils.richtext import SemapSchilder, SemesterDocument from .dialog_sources.documentprint_ui import Ui_Dialog diff --git a/src/ui/dialogs/elsa_add_entry.py b/src/ui/dialogs/elsa_add_entry.py index b8879ac..17fc5b7 100644 --- a/src/ui/dialogs/elsa_add_entry.py +++ b/src/ui/dialogs/elsa_add_entry.py @@ -1,8 +1,8 @@ from PySide6 import QtWidgets from src import Icon -from src.logic.webrequest import BibTextTransformer, WebRequest -from src.logic.zotero import ZoteroController +from src.services.webrequest import BibTextTransformer, WebRequest +from src.services.zotero import ZoteroController from src.shared.logging import log from src.transformers.transformers import DictToTable diff --git a/src/ui/dialogs/fileparser.py b/src/ui/dialogs/fileparser.py index c0c29cd..edb736f 100644 --- a/src/ui/dialogs/fileparser.py +++ b/src/ui/dialogs/fileparser.py @@ -1,6 +1,6 @@ from PySide6 import QtWidgets -from src.logic.webrequest import BibTextTransformer, WebRequest +from src.services.webrequest import BibTextTransformer, WebRequest from .dialog_sources.Ui_fileparser import Ui_Dialog diff --git a/src/ui/dialogs/login.py b/src/ui/dialogs/login.py index 649cbaf..09b9d1f 100644 --- a/src/ui/dialogs/login.py +++ b/src/ui/dialogs/login.py @@ -5,7 +5,7 @@ import loguru from PySide6 import QtCore, QtWidgets from src import LOG_DIR, Icon -from src.backend.database import Database +from src.database import Database from .dialog_sources.login_ui import Ui_Dialog @@ -75,7 +75,7 @@ class LoginDialog(Ui_Dialog): hashed_password = hashlib.sha256(password.encode()).hexdigest() if len(self.db.getUsers()) == 0: - from src.backend.admin_console import AdminCommands + from src.admin import AdminCommands AdminCommands().create_admin() self.lresult = 1 # Indicate successful login diff --git a/src/ui/dialogs/newEdition.py b/src/ui/dialogs/newEdition.py index a103690..77fe6e1 100644 --- a/src/ui/dialogs/newEdition.py +++ b/src/ui/dialogs/newEdition.py @@ -1,7 +1,7 @@ from PySide6 import QtCore, QtWidgets -from src.backend.catalogue import Catalogue -from src.backend.database import Database +from src.services.catalogue import Catalogue +from src.database import Database from src.ui.dialogs.mail import Mail_Dialog from .dialog_sources.order_neweditions_ui import Ui_Dialog diff --git a/src/ui/dialogs/parsed_titles.py b/src/ui/dialogs/parsed_titles.py index dca3cdd..416f38b 100644 --- a/src/ui/dialogs/parsed_titles.py +++ b/src/ui/dialogs/parsed_titles.py @@ -4,7 +4,7 @@ import loguru from PySide6 import QtWidgets from src import LOG_DIR -from src.backend import AutoAdder +from src.background import AutoAdder from .dialog_sources.parsed_titles_ui import Ui_Form diff --git a/src/ui/dialogs/progress.py b/src/ui/dialogs/progress.py index cdeab3b..6abd9c2 100644 --- a/src/ui/dialogs/progress.py +++ b/src/ui/dialogs/progress.py @@ -5,9 +5,9 @@ from PySide6 import QtCore from PySide6.QtWidgets import QDialog, QPushButton, QVBoxLayout from qtqdm import Qtqdm, QtqdmProgressBar -from src.logic import BookData -from src.logic.lehmannsapi import LehmannsClient -from src.logic.SRU import SWB +from src.core.models import BookData +from src.services.lehmanns import LehmannsClient +from src.services.sru import SWB class CheckThread(QtCore.QThread): diff --git a/src/ui/userInterface.py b/src/ui/userInterface.py index efac025..3bf6da8 100644 --- a/src/ui/userInterface.py +++ b/src/ui/userInterface.py @@ -15,24 +15,27 @@ from PySide6.QtGui import QRegularExpressionValidator from PySide6.QtMultimedia import QAudioOutput, QMediaPlayer from src import Icon -from src.backend import ( +from src.database import Database +from src.background import ( AvailChecker, BookGrabber, - Database, DocumentationThread, NewEditionCheckerThread, ) -from src.backend.create_file import recreateFile -from src.backend.delete_temp_contents import delete_temp_contents as tempdelete -from src.logic import ( - APP_NRS, +from src.utils.files import recreateFile, delete_temp_contents as tempdelete +from src.core.models import ( Apparat, ApparatData, BookData, Prof, SemapDocument, Semester, +) +from src.core.constants import APP_NRS +from src.parsers import ( csv_to_list, +) +from src.logic import ( eml_to_semap, pdf_to_semap, word_to_semap, diff --git a/src/ui/widgets/MessageCalendar.py b/src/ui/widgets/MessageCalendar.py index cc03437..4c6b89f 100644 --- a/src/ui/widgets/MessageCalendar.py +++ b/src/ui/widgets/MessageCalendar.py @@ -5,7 +5,7 @@ from PySide6 import QtCore, QtWidgets from PySide6.QtCore import QDate from PySide6.QtGui import QColor, QPen -from src.backend import Database +from src.database import Database from src.shared.logging import log color = "#ddfb00" if darkdetect.isDark() else "#2204ff" diff --git a/src/ui/widgets/admin_create_user.py b/src/ui/widgets/admin_create_user.py index d9a0e68..84f4a16 100644 --- a/src/ui/widgets/admin_create_user.py +++ b/src/ui/widgets/admin_create_user.py @@ -1,7 +1,8 @@ from PySide6 import QtWidgets from PySide6.QtCore import Signal from .widget_sources.admin_create_user_ui import Ui_Dialog -from src.backend import AdminCommands, Database +from src.admin import AdminCommands +from src.database import Database class UserCreate(QtWidgets.QDialog, Ui_Dialog): diff --git a/src/ui/widgets/admin_edit_prof.py b/src/ui/widgets/admin_edit_prof.py index 083cded..d94aa5c 100644 --- a/src/ui/widgets/admin_edit_prof.py +++ b/src/ui/widgets/admin_edit_prof.py @@ -4,8 +4,8 @@ import loguru from PySide6 import QtWidgets from src import LOG_DIR -from src.backend import Database -from src.logic import Prof +from src.database import Database +from src.core.models import Prof from .widget_sources.admin_edit_prof_ui import Ui_Dialog # diff --git a/src/ui/widgets/admin_edit_user.py b/src/ui/widgets/admin_edit_user.py index bbf78ca..101911f 100644 --- a/src/ui/widgets/admin_edit_user.py +++ b/src/ui/widgets/admin_edit_user.py @@ -1,6 +1,7 @@ from PySide6 import QtWidgets -from src.backend import AdminCommands, Database +from src.admin import AdminCommands +from src.database import Database from .widget_sources.admin_edit_user_ui import Ui_Dialog diff --git a/src/ui/widgets/admin_query.py b/src/ui/widgets/admin_query.py index c4d6fe1..2c63ace 100644 --- a/src/ui/widgets/admin_query.py +++ b/src/ui/widgets/admin_query.py @@ -1,7 +1,7 @@ from PySide6 import QtCore, QtWidgets from src import Icon -from src.backend import Database +from src.database import Database from .widget_sources. import Ui_Form diff --git a/src/ui/widgets/calendar_entry.py b/src/ui/widgets/calendar_entry.py index 2dd68a1..e8327ae 100644 --- a/src/ui/widgets/calendar_entry.py +++ b/src/ui/widgets/calendar_entry.py @@ -2,7 +2,7 @@ from PySide6 import QtWidgets from PySide6.QtCore import Signal from src import Icon -from src.backend.database import Database +from src.database import Database from .widget_sources.calendar_entry_ui import Ui_Dialog diff --git a/src/ui/widgets/elsa_main.py b/src/ui/widgets/elsa_main.py index 6607446..4975b43 100644 --- a/src/ui/widgets/elsa_main.py +++ b/src/ui/widgets/elsa_main.py @@ -5,8 +5,10 @@ from PySide6.QtCore import QDate from PySide6.QtGui import QRegularExpressionValidator from src import Icon -from src.backend import Database, recreateElsaFile -from src.logic import Prof, Semester, elsa_word_to_csv +from src.database import Database +from src.utils.files import recreateElsaFile +from src.core.models import Prof, Semester +from src.logic import elsa_word_to_csv from src.shared.logging import log from src.ui.dialogs import ElsaAddEntry, popus_confirm from src.ui.widgets.filepicker import FilePicker diff --git a/src/ui/widgets/graph.py b/src/ui/widgets/graph.py index ad5d1d8..838744f 100644 --- a/src/ui/widgets/graph.py +++ b/src/ui/widgets/graph.py @@ -5,7 +5,7 @@ from PySide6 import QtCore, QtGui, QtWidgets from PySide6.QtCharts import QCategoryAxis, QChart, QChartView, QLineSeries, QValueAxis from PySide6.QtGui import QColor, QPainter, QPen -from src.logic.semester import Semester +from src.core.models import Semester def mergedicts(d1: dict[str, Any], d2: dict[str, Any]): @@ -101,7 +101,7 @@ class DataQtGraph(QtWidgets.QWidget): self.chart.createDefaultAxes() for entry in lst: - # print("entry:", entry) + print("entry:", entry) entryseries = QLineSeries() for x_val, y_val in zip(entry["x"], entry["y"]): # diff --git a/src/ui/widgets/new_edition_check.py b/src/ui/widgets/new_edition_check.py index 56a36b3..e903265 100644 --- a/src/ui/widgets/new_edition_check.py +++ b/src/ui/widgets/new_edition_check.py @@ -4,8 +4,8 @@ from PySide6 import QtWidgets from PySide6.QtCore import Qt from src import Icon -from src.backend.catalogue import Catalogue -from src.logic import BookData +from src.services.catalogue import Catalogue +from src.core.models import BookData from .widget_sources.new_edition_check_book_ui import ( Ui_Dialog as Ui_NewEditionCheckBook, diff --git a/src/ui/widgets/searchPage.py b/src/ui/widgets/searchPage.py index c8cb0f1..c415161 100644 --- a/src/ui/widgets/searchPage.py +++ b/src/ui/widgets/searchPage.py @@ -4,9 +4,9 @@ from natsort import natsorted from PySide6 import QtCore, QtGui, QtWidgets from PySide6.QtCore import Signal -from src.backend import Database -from src.logic import BookData, Prof, Semester, custom_sort, sort_semesters_list -from src.logic.dataclass import Apparat +from src.core.models import Apparat, BookData, Prof, Semester +from src.database import Database +from src.logic import custom_sort, sort_semesters_list from src.shared.logging import log from src.ui.dialogs import ApparatExtendDialog, Mail_Dialog, ReminderDialog from src.ui.widgets import DataQtGraph, StatusWidget @@ -374,6 +374,7 @@ class SearchStatisticPage(QtWidgets.QDialog, Ui_Dialog): "x": [i[0] for i in data], "y": {"Erstellt": [i[1] for i in data], "Gelöscht": [i[2] for i in data]}, } + log.debug(graph_data) graph = DataQtGraph( title="Erstellte und gelöschte Apparate", data=graph_data, diff --git a/src/ui/widgets/signature_update.py b/src/ui/widgets/signature_update.py index b0b49fe..92f4cf1 100644 --- a/src/ui/widgets/signature_update.py +++ b/src/ui/widgets/signature_update.py @@ -6,10 +6,10 @@ from queue import Empty, Queue from PySide6 import QtCore, QtWidgets from PySide6.QtMultimedia import QAudioOutput, QMediaPlayer -from src.backend.catalogue import Catalogue -from src.backend.database import Database -from src.backend.webadis import get_book_medianr -from src.logic.SRU import SWB +from src.services.catalogue import Catalogue +from src.database import Database +from src.services.webadis import get_book_medianr +from src.services.sru import SWB from src.shared.logging import log from .widget_sources.admin_update_signatures_ui import Ui_Dialog diff --git a/src/ui/widgets/welcome_wizard.py b/src/ui/widgets/welcome_wizard.py index 85418e5..c9a1700 100644 --- a/src/ui/widgets/welcome_wizard.py +++ b/src/ui/widgets/welcome_wizard.py @@ -5,7 +5,7 @@ from appdirs import AppDirs from PySide6 import QtCore, QtWidgets from src import settings -from src.backend import Database +from src.database import Database from src.shared.logging import log from .widget_sources.welcome_wizard_ui import Ui_Wizard @@ -80,7 +80,7 @@ class WelcomeWizard(QtWidgets.QWizard, Ui_Wizard): self.settings_database_name.setText("semesterapparate.db") def test_login_data(self): - from src.backend import AdminCommands + from src.admin import AdminCommands log.info("Testing login data for SAM user") db_path = ( @@ -109,7 +109,7 @@ class WelcomeWizard(QtWidgets.QWizard, Ui_Wizard): def create_sam_user(self): """Create a SAM user in the database.""" - from src.backend import AdminCommands + from src.admin import AdminCommands db_path = ( self.settings_database.text() + "/" + self.settings_database_name.text() diff --git a/src/utils/files.py b/src/utils/files.py new file mode 100644 index 0000000..12f0f81 --- /dev/null +++ b/src/utils/files.py @@ -0,0 +1,100 @@ +"""File operations and management utilities.""" + +import os +from pathlib import Path + +from src import LOG_DIR, settings +from src.database import Database +from src.shared.logging import log + + +def recreate_file(name: str, app_id: int, filetype: str, open_file: bool = True) -> Path: + """ + Recreate a file from the database and optionally open it. + + Args: + name: The filename selected by the user. + app_id: The ID of the apparatus. + filetype: The extension of the file to be created. + open_file: Determines if the file should be opened. Defaults to True. + + Returns: + Absolute path to the file. + """ + db = Database() + path = db.recreateFile(name, app_id, filetype=filetype) + path = Path(path) + log.info(f"File created: {path}") + + if open_file: + path = path.resolve() + if os.getenv("OS") == "Windows_NT": + os.startfile(path) + else: + os.system(f"open {path}") + + return path + + +# Legacy name for backwards compatibility +def recreateFile(name: str, app_id: int, filetype: str, open: bool = True) -> Path: + """Legacy function name - use recreate_file instead.""" + return recreate_file(name, app_id, filetype, open) + + +def recreate_elsa_file(filename: str, filetype: str, open_file: bool = True) -> Path: + """ + Recreate an ELSA file from the database and optionally open it. + + Args: + filename: The filename selected by the user. + filetype: The file extension. + open_file: Determines if the file should be opened. Defaults to True. + + Returns: + Absolute path to the file. + """ + if filename.startswith("(") and filename.endswith(")"): + filename = str(filename[1:-1].replace("'", "")) + + if not isinstance(filename, str): + raise ValueError("filename must be a string") + + db = Database() + path = db.recreateElsaFile(filename, filetype) + path = Path(path) + + if open_file: + path = path.resolve() + if os.getenv("OS") == "Windows_NT": + os.startfile(path) + else: + os.system(f"open {path}") + + return path + + +# Legacy name for backwards compatibility +def recreateElsaFile(filename: str, filetype: str, open: bool = True) -> Path: + """Legacy function name - use recreate_elsa_file instead.""" + return recreate_elsa_file(filename, filetype, open) + + +def delete_temp_contents() -> None: + """Delete the contents of the temp directory.""" + database = settings.database + path = database.temp.expanduser() + + for root, dirs, files in os.walk(path, topdown=False): + for file in files: + try: + os.remove(os.path.join(root, file)) + except Exception as e: + log.warning(f"Could not remove file {file}: {e}") + for dir in dirs: + try: + os.rmdir(os.path.join(root, dir)) + except Exception as e: + log.warning(f"Could not remove directory {dir}: {e}") + + log.info(f"Temp directory cleared: {path}") diff --git a/test.py b/test.py index 5a5da36..7a1ffda 100644 --- a/test.py +++ b/test.py @@ -1,33 +1,15 @@ -from src.logic.semester import Semester +from src.backend.catalogue import Catalogue +from src.logic.webrequest import BibTextTransformer, WebRequest -sem1 = Semester.from_string("WiSe 23/24") -print(sem1.value) -sem2 = Semester.from_string("SoSe 24") -print(sem2.value) -sem3 = Semester() -print(sem3.value) +cat = Catalogue() +result = cat.get_book("3825872475") +print(result) +web = WebRequest() +web.get_ppn("3825872475") +data = web.get_data_elsa() +print(data) +bib = BibTextTransformer() +bib.get_data(result) +book = bib.return_data() -print("Comparing Sem1 with sem2") -assert sem1.isPastSemester(sem2) is True -assert sem1.isFutureSemester(sem2) is False -assert sem1.isMatch(sem2) is False -print("Comparing Sem2 with sem1") -assert sem2.isPastSemester(sem1) is False -assert sem2.isFutureSemester(sem1) is True -assert sem2.isMatch(sem1) is False -print("Comparing Sem1 with sem1") -assert sem1.isPastSemester(sem1) is False -assert sem1.isFutureSemester(sem1) is False -assert sem1.isMatch(sem1) is True -print("Comparing Sem2 with sem2") -assert sem2.isPastSemester(sem2) is False -assert sem2.isFutureSemester(sem2) is False -assert sem2.isMatch(sem2) is True -print("Comparing Sem3 with sem3") -assert sem3.isPastSemester(sem3) is False -assert sem3.isFutureSemester(sem3) is False -assert sem3.isMatch(sem3) is True -print("Comparing Sem1 with sem3") -assert sem1.isPastSemester(sem3) is True -assert sem1.isFutureSemester(sem3) is False -assert sem1.isMatch(sem3) is False +print(book) diff --git a/tests/test_migrations_runner.py b/tests/test_migrations_runner.py index c86561c..2ffbcdb 100644 --- a/tests/test_migrations_runner.py +++ b/tests/test_migrations_runner.py @@ -1,7 +1,7 @@ import sqlite3 as sql from pathlib import Path -from src.backend.database import Database +from src.database import Database p = Path("devtests_test_migrations.db") if p.exists(): diff --git a/uv.lock b/uv.lock index 9ca2491..6605328 100644 --- a/uv.lock +++ b/uv.lock @@ -165,6 +165,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216 }, ] +[[package]] +name = "chardet" +version = "5.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385 }, +] + [[package]] name = "charset-normalizer" version = "3.4.3" @@ -226,6 +235,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/f4/7b7fdbb613992013c4518a0bf8fee2915f79ec07bcfa6180569bca7fa8ef/comtypes-1.4.11-py3-none-any.whl", hash = "sha256:1760d5059ca7ca1d61b574c998378d879c271a86c41f88926619ea97497592bb", size = 246365 }, ] +[[package]] +name = "cssselect" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/0a/c3ea9573b1dc2e151abfe88c7fe0c26d1892fe6ed02d0cdb30f0d57029d5/cssselect-1.3.0.tar.gz", hash = "sha256:57f8a99424cfab289a1b6a816a43075a4b00948c86b4dcf3ef4ee7e15f7ab0c7", size = 42870 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/58/257350f7db99b4ae12b614a36256d9cc870d71d9e451e79c2dc3b23d7c3c/cssselect-1.3.0-py3-none-any.whl", hash = "sha256:56d1bf3e198080cc1667e137bc51de9cadfca259f03c2d4e09037b3e01e30f0d", size = 18786 }, +] + [[package]] name = "darkdetect" version = "0.8.0" @@ -820,6 +838,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 }, ] +[[package]] +name = "pdfminer" +version = "20191125" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycryptodome" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/a3/155c5cde5f9c0b1069043b2946a93f54a41fd72cc19c6c100f6f2f5bdc15/pdfminer-20191125.tar.gz", hash = "sha256:9e700bc731300ed5c8936343c1dd4529638184198e54e91dd2b59b64a755dc01", size = 4173248 } + +[[package]] +name = "pdfquery" +version = "0.4.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "chardet" }, + { name = "cssselect" }, + { name = "lxml" }, + { name = "pdfminer" }, + { name = "pyquery" }, + { name = "roman" }, + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e5/ed/caf087d2d65ceef10fb117af79bbab50ea3a24ed8b1dc8abb0dc8039d2d3/pdfquery-0.4.3.tar.gz", hash = "sha256:a2a2974cb312fda4f569adc8d63377d25d5c6367240b4a7bfb165392c73e1dce", size = 17489 } + [[package]] name = "plaster" version = "1.1.2" @@ -882,6 +924,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/4f/5249960887b1fbe561d9ff265496d170b55a735b76724f10ef19f9e40716/prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07", size = 387810 }, ] +[[package]] +name = "pycryptodome" +version = "3.23.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/a6/8452177684d5e906854776276ddd34eca30d1b1e15aa1ee9cefc289a33f5/pycryptodome-3.23.0.tar.gz", hash = "sha256:447700a657182d60338bab09fdb27518f8856aecd80ae4c6bdddb67ff5da44ef", size = 4921276 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/5d/bdb09489b63cd34a976cc9e2a8d938114f7a53a74d3dd4f125ffa49dce82/pycryptodome-3.23.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:0011f7f00cdb74879142011f95133274741778abba114ceca229adbf8e62c3e4", size = 2495152 }, + { url = "https://files.pythonhosted.org/packages/a7/ce/7840250ed4cc0039c433cd41715536f926d6e86ce84e904068eb3244b6a6/pycryptodome-3.23.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:90460fc9e088ce095f9ee8356722d4f10f86e5be06e2354230a9880b9c549aae", size = 1639348 }, + { url = "https://files.pythonhosted.org/packages/ee/f0/991da24c55c1f688d6a3b5a11940567353f74590734ee4a64294834ae472/pycryptodome-3.23.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4764e64b269fc83b00f682c47443c2e6e85b18273712b98aa43bcb77f8570477", size = 2184033 }, + { url = "https://files.pythonhosted.org/packages/54/16/0e11882deddf00f68b68dd4e8e442ddc30641f31afeb2bc25588124ac8de/pycryptodome-3.23.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb8f24adb74984aa0e5d07a2368ad95276cf38051fe2dc6605cbcf482e04f2a7", size = 2270142 }, + { url = "https://files.pythonhosted.org/packages/d5/fc/4347fea23a3f95ffb931f383ff28b3f7b1fe868739182cb76718c0da86a1/pycryptodome-3.23.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d97618c9c6684a97ef7637ba43bdf6663a2e2e77efe0f863cce97a76af396446", size = 2309384 }, + { url = "https://files.pythonhosted.org/packages/6e/d9/c5261780b69ce66d8cfab25d2797bd6e82ba0241804694cd48be41add5eb/pycryptodome-3.23.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9a53a4fe5cb075075d515797d6ce2f56772ea7e6a1e5e4b96cf78a14bac3d265", size = 2183237 }, + { url = "https://files.pythonhosted.org/packages/5a/6f/3af2ffedd5cfa08c631f89452c6648c4d779e7772dfc388c77c920ca6bbf/pycryptodome-3.23.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:763d1d74f56f031788e5d307029caef067febf890cd1f8bf61183ae142f1a77b", size = 2343898 }, + { url = "https://files.pythonhosted.org/packages/9a/dc/9060d807039ee5de6e2f260f72f3d70ac213993a804f5e67e0a73a56dd2f/pycryptodome-3.23.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:954af0e2bd7cea83ce72243b14e4fb518b18f0c1649b576d114973e2073b273d", size = 2269197 }, + { url = "https://files.pythonhosted.org/packages/f9/34/e6c8ca177cb29dcc4967fef73f5de445912f93bd0343c9c33c8e5bf8cde8/pycryptodome-3.23.0-cp313-cp313t-win32.whl", hash = "sha256:257bb3572c63ad8ba40b89f6fc9d63a2a628e9f9708d31ee26560925ebe0210a", size = 1768600 }, + { url = "https://files.pythonhosted.org/packages/e4/1d/89756b8d7ff623ad0160f4539da571d1f594d21ee6d68be130a6eccb39a4/pycryptodome-3.23.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6501790c5b62a29fcb227bd6b62012181d886a767ce9ed03b303d1f22eb5c625", size = 1799740 }, + { url = "https://files.pythonhosted.org/packages/5d/61/35a64f0feaea9fd07f0d91209e7be91726eb48c0f1bfc6720647194071e4/pycryptodome-3.23.0-cp313-cp313t-win_arm64.whl", hash = "sha256:9a77627a330ab23ca43b48b130e202582e91cc69619947840ea4d2d1be21eb39", size = 1703685 }, + { url = "https://files.pythonhosted.org/packages/db/6c/a1f71542c969912bb0e106f64f60a56cc1f0fabecf9396f45accbe63fa68/pycryptodome-3.23.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:187058ab80b3281b1de11c2e6842a357a1f71b42cb1e15bce373f3d238135c27", size = 2495627 }, + { url = "https://files.pythonhosted.org/packages/6e/4e/a066527e079fc5002390c8acdd3aca431e6ea0a50ffd7201551175b47323/pycryptodome-3.23.0-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:cfb5cd445280c5b0a4e6187a7ce8de5a07b5f3f897f235caa11f1f435f182843", size = 1640362 }, + { url = "https://files.pythonhosted.org/packages/50/52/adaf4c8c100a8c49d2bd058e5b551f73dfd8cb89eb4911e25a0c469b6b4e/pycryptodome-3.23.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67bd81fcbe34f43ad9422ee8fd4843c8e7198dd88dd3d40e6de42ee65fbe1490", size = 2182625 }, + { url = "https://files.pythonhosted.org/packages/5f/e9/a09476d436d0ff1402ac3867d933c61805ec2326c6ea557aeeac3825604e/pycryptodome-3.23.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8987bd3307a39bc03df5c8e0e3d8be0c4c3518b7f044b0f4c15d1aa78f52575", size = 2268954 }, + { url = "https://files.pythonhosted.org/packages/f9/c5/ffe6474e0c551d54cab931918127c46d70cab8f114e0c2b5a3c071c2f484/pycryptodome-3.23.0-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa0698f65e5b570426fc31b8162ed4603b0c2841cbb9088e2b01641e3065915b", size = 2308534 }, + { url = "https://files.pythonhosted.org/packages/18/28/e199677fc15ecf43010f2463fde4c1a53015d1fe95fb03bca2890836603a/pycryptodome-3.23.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:53ecbafc2b55353edcebd64bf5da94a2a2cdf5090a6915bcca6eca6cc452585a", size = 2181853 }, + { url = "https://files.pythonhosted.org/packages/ce/ea/4fdb09f2165ce1365c9eaefef36625583371ee514db58dc9b65d3a255c4c/pycryptodome-3.23.0-cp37-abi3-musllinux_1_2_i686.whl", hash = "sha256:156df9667ad9f2ad26255926524e1c136d6664b741547deb0a86a9acf5ea631f", size = 2342465 }, + { url = "https://files.pythonhosted.org/packages/22/82/6edc3fc42fe9284aead511394bac167693fb2b0e0395b28b8bedaa07ef04/pycryptodome-3.23.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:dea827b4d55ee390dc89b2afe5927d4308a8b538ae91d9c6f7a5090f397af1aa", size = 2267414 }, + { url = "https://files.pythonhosted.org/packages/59/fe/aae679b64363eb78326c7fdc9d06ec3de18bac68be4b612fc1fe8902693c/pycryptodome-3.23.0-cp37-abi3-win32.whl", hash = "sha256:507dbead45474b62b2bbe318eb1c4c8ee641077532067fec9c1aa82c31f84886", size = 1768484 }, + { url = "https://files.pythonhosted.org/packages/54/2f/e97a1b8294db0daaa87012c24a7bb714147c7ade7656973fd6c736b484ff/pycryptodome-3.23.0-cp37-abi3-win_amd64.whl", hash = "sha256:c75b52aacc6c0c260f204cbdd834f76edc9fb0d8e0da9fbf8352ef58202564e2", size = 1799636 }, + { url = "https://files.pythonhosted.org/packages/18/3d/f9441a0d798bf2b1e645adc3265e55706aead1255ccdad3856dbdcffec14/pycryptodome-3.23.0-cp37-abi3-win_arm64.whl", hash = "sha256:11eeeb6917903876f134b56ba11abe95c0b0fd5e3330def218083c7d98bbcb3c", size = 1703675 }, + { url = "https://files.pythonhosted.org/packages/9f/7c/f5b0556590e7b4e710509105e668adb55aa9470a9f0e4dea9c40a4a11ce1/pycryptodome-3.23.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:350ebc1eba1da729b35ab7627a833a1a355ee4e852d8ba0447fafe7b14504d56", size = 1705791 }, + { url = "https://files.pythonhosted.org/packages/33/38/dcc795578d610ea1aaffef4b148b8cafcfcf4d126b1e58231ddc4e475c70/pycryptodome-3.23.0-pp27-pypy_73-win32.whl", hash = "sha256:93837e379a3e5fd2bb00302a47aee9fdf7940d83595be3915752c74033d17ca7", size = 1780265 }, +] + [[package]] name = "pydantic" version = "2.11.7" @@ -982,6 +1056,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120 }, ] +[[package]] +name = "pyquery" +version = "2.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cssselect" }, + { name = "lxml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/48/79e774ea00b671d08867f06d9258203be81834236c150ac00e942d8fc4db/pyquery-2.0.1.tar.gz", hash = "sha256:0194bb2706b12d037db12c51928fe9ebb36b72d9e719565daba5a6c595322faf", size = 44999 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/f5/5067b48012967ea166b9bd0a015b69e0560e4c6e7c06f28d9bab8f9dd10b/pyquery-2.0.1-py3-none-any.whl", hash = "sha256:aedfa0bd0eb9afc94b3ddbec8f375a6362b32bc9662f46e3e0d866483f4771b0", size = 22573 }, +] + [[package]] name = "pyramid" version = "2.0.2" @@ -1210,6 +1297,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b6/c2/9fce4c8a9587c4e90500114d742fe8ef0fd92d7bad29d136bb9941add271/rich_click-1.8.9-py3-none-any.whl", hash = "sha256:c3fa81ed8a671a10de65a9e20abf642cfdac6fdb882db1ef465ee33919fbcfe2", size = 36082 }, ] +[[package]] +name = "roman" +version = "5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/30/86/8bdb59db4b7ea9a2bd93f8d25298981e09a4c9f4744cf4cbafa7ef6fee7b/roman-5.1.tar.gz", hash = "sha256:3a86572e9bc9183e771769601189e5fa32f1620ffeceebb9eca836affb409986", size = 8066 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/d0/27c9840ddaf331ace898c7f4aa1e1304a7acc22b844b5420fabb6d14c3a0/roman-5.1-py3-none-any.whl", hash = "sha256:bf595d8a9bc4a8e8b1dfa23e1d4def0251b03b494786df6b8c3d3f1635ce285a", size = 5825 }, +] + [[package]] name = "semesterapparatsmanager" version = "1.0.0" @@ -1231,6 +1327,7 @@ dependencies = [ { name = "omegaconf" }, { name = "openai" }, { name = "pandas" }, + { name = "pdfquery" }, { name = "playwright" }, { name = "pyramid" }, { name = "pyside6" }, @@ -1268,6 +1365,7 @@ requires-dist = [ { name = "omegaconf", specifier = ">=2.3.0" }, { name = "openai", specifier = ">=1.79.0" }, { name = "pandas", specifier = ">=2.2.3" }, + { name = "pdfquery", specifier = ">=0.4.3" }, { name = "playwright", specifier = ">=1.49.1" }, { name = "pyramid", specifier = ">=2.0.2" }, { name = "pyside6", specifier = ">=6.9.1" },