Compare commits
82 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d6883b0388 | ||
| 9e64d10bf4 | |||
|
9f1dfa1030
|
|||
|
|
8c42d5fa45 | ||
| a8ebaee154 | |||
|
84b00409f7
|
|||
|
2d54d64a46
|
|||
|
6c6d140c2f
|
|||
| ee62c65ae7 | |||
| a4460ec17b | |||
| ab62212201 | |||
| dcb5a44ceb | |||
| f63bcc8446 | |||
| 0764a6b06a | |||
| ef21a18b87 | |||
| 0406fe4f6f | |||
| 6af34bf2df | |||
| 560d8285b5 | |||
| b30d1aac47 | |||
| 3cc6e793d2 | |||
| 7e07bdea0c | |||
| 06965db26a | |||
| 0df7fd9fe6 | |||
| 713dbc1a1d | |||
| e061c1f5a9 | |||
| 8e9eff4f3a | |||
| 6a11b3482e | |||
| d35b2e816e | |||
| 11d5d67538 | |||
| ebf8363b2a | |||
| a2631570ec | |||
| 9831aa3a62 | |||
| c4be1d8bfa | |||
| 7079b4d47f | |||
| 65c86a65cd | |||
| f4e75831d5 | |||
| 4f28cfe55c | |||
| 8b8c1c9393 | |||
| 247db562b1 | |||
| 1263faa23f | |||
| fd6684cc47 | |||
| 1ee7901d49 | |||
| e934a2b3f1 | |||
| 7be9dba9ca | |||
| 6f21c22d22 | |||
| 1f34442397 | |||
| 373257864f | |||
| b577a69dad | |||
| a64fa9770f | |||
| 0061708785 | |||
| a3b68c2b77 | |||
| 0ac5051aef | |||
| bf419ec3bf | |||
| c6f356fda4 | |||
| 087b8753fb | |||
| 09ec304637 | |||
| f6ab64a8ee | |||
| 4254567bfb | |||
| 9ce46abdce | |||
| 8cce13f6e5 | |||
| f22cbcd26a | |||
| 6f22186b67 | |||
| a231213276 | |||
| b344d806e2 | |||
| 0e3199e289 | |||
| c00eb102ff | |||
| 63b2a1b7a3 | |||
| 5a4156ba04 | |||
| af53b0310f | |||
| ce7d22b26b | |||
| 5f15352401 | |||
| 7da2b3f65d | |||
| 5bf5eeae00 | |||
| c6cbb1d825 | |||
| 3bfb788f42 | |||
| 1c5dfc8f3e | |||
| 4c26aa8d21 | |||
| b67a160e7a | |||
| d8fabdbe11 | |||
| ee8ea9dfda | |||
| ec0f72337d | |||
| 6ae52b6626 |
@@ -1,59 +1,223 @@
|
||||
name: Build and Release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release_notes:
|
||||
description: Release notes (use \n for newlines)
|
||||
type: string
|
||||
required: false
|
||||
github_release:
|
||||
description: 'Create Gitea Release'
|
||||
description: "Create Gitea Release"
|
||||
default: true
|
||||
type: boolean
|
||||
prerelease:
|
||||
description: "Is this a prerelease?"
|
||||
default: false
|
||||
type: boolean
|
||||
bump:
|
||||
description: 'Bump type'
|
||||
description: "Bump type"
|
||||
required: false
|
||||
default: 'patch'
|
||||
default: "patch"
|
||||
type: choice
|
||||
options:
|
||||
- 'major'
|
||||
- 'minor'
|
||||
- 'patch'
|
||||
- "major"
|
||||
- "minor"
|
||||
- "patch"
|
||||
|
||||
env:
|
||||
BASE_URL: "http://192.168.178.110:3000"
|
||||
|
||||
jobs:
|
||||
bump:
|
||||
prepare:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.bump.outputs.version }}
|
||||
tag: ${{ steps.bump.outputs.tag }}
|
||||
changelog: ${{ steps.build_changelog.outputs.changelog }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Install UV
|
||||
fetch-tags: true
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
|
||||
- name: Set up Python
|
||||
run: uv python install
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
# Uses the version specified in pyproject.toml
|
||||
python-version-file: "pyproject.toml"
|
||||
|
||||
- name: Set Git identity
|
||||
run: |
|
||||
git config user.name "Gitea CI"
|
||||
git config user.email "ci@git.theprivateserver.de"
|
||||
|
||||
- name: Bump version
|
||||
id: bump
|
||||
run: |
|
||||
uv tool install bump-my-version
|
||||
|
||||
uv tool run bump-my-version bump "${{ github.event.inputs.bump }}"
|
||||
version="$(uv tool run bump-my-version show current_version)"
|
||||
|
||||
echo "VERSION=$version" >> "$GITHUB_ENV"
|
||||
echo "version=$version" >> "$GITHUB_OUTPUT"
|
||||
echo "tag=v$version" >> "$GITHUB_OUTPUT"
|
||||
# no env needed here, uv handles the Python it installs
|
||||
|
||||
- name: Push changes
|
||||
uses: ad-m/github-push-action@master
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
branch: ${{ github.ref }}
|
||||
|
||||
- name: Build Changelog
|
||||
id: build_changelog
|
||||
uses: https://github.com/mikepenz/release-changelog-builder-action@v6.0.1
|
||||
with:
|
||||
platform: "gitea"
|
||||
baseURL: "${{ env.BASE_URL }}"
|
||||
configuration: ".gitea/changelog_config.json"
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||
|
||||
build-linux:
|
||||
needs: prepare
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
VERSION: ${{ needs.prepare.outputs.version }}
|
||||
TAG_NAME: ${{ needs.prepare.outputs.tag }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
fetch-tags: true
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version-file: "pyproject.toml"
|
||||
- name: Install dependencies
|
||||
run: uv sync --locked --all-extras --dev
|
||||
- name: Install Bump tool
|
||||
run: uv tool install bump-my-version
|
||||
- name: Bump version
|
||||
id: bump_version
|
||||
|
||||
- name: Install all dependencies
|
||||
run: uv sync --all-groups
|
||||
|
||||
- name: Build Linux release with Nuitka
|
||||
run: |
|
||||
uv tool run bump-my-version bump ${{ github.event.inputs.bump }} --tag --allow-dirty
|
||||
- name: Add release notes
|
||||
id: add_release_notes
|
||||
uv run python -m nuitka \
|
||||
--standalone \
|
||||
--output-dir=dist \
|
||||
--include-data-dir=./config=config \
|
||||
--include-data-dir=./site=site \
|
||||
--include-data-dir=./icons=icons \
|
||||
--include-data-dir=./mail_vorlagen=mail_vorlagen \
|
||||
--enable-plugin=pyside6 \
|
||||
--product-name=SemesterApparatsManager \
|
||||
--product-version=${VERSION} \
|
||||
--output-filename=SAM \
|
||||
main.py
|
||||
|
||||
- name: Prepare Linux Release Artifact
|
||||
run: |
|
||||
echo "RELEASE_NOTES<<EOF" >> $GITHUB_ENV
|
||||
echo "${{ github.event.inputs.release_notes }}" >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
- name: Create Gitea Release
|
||||
mkdir -p releases
|
||||
cd dist/SemesterApparatsManager.dist
|
||||
zip -r "../../releases/SAM-linux-v${VERSION}.zip" *
|
||||
cd ../../
|
||||
|
||||
- name: Create / Update Gitea Release (Linux asset + changelog)
|
||||
if: ${{ github.event.inputs.github_release == 'true' }}
|
||||
uses: softprops/action-gh-release@v1
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name:
|
||||
release_name: Release ${{ github.sha }}
|
||||
body: ${{ env.RELEASE_NOTES }}
|
||||
tag_name: ${{ env.TAG_NAME }}
|
||||
name: Release ${{ env.TAG_NAME }}
|
||||
body: ${{ needs.prepare.outputs.changelog }}
|
||||
draft: false
|
||||
prerelease: false
|
||||
prerelease: ${{ github.event.inputs.prerelease }}
|
||||
make_latest: true
|
||||
files: |
|
||||
releases/SAM-linux-v${{ env.VERSION }}.zip
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.TOKEN }}
|
||||
GITHUB_REPOSITORY: ${{ github.repository }}
|
||||
|
||||
build-windows:
|
||||
needs: [prepare, build-linux]
|
||||
runs-on: windows-latest
|
||||
env:
|
||||
VERSION: ${{ needs.prepare.outputs.version }}
|
||||
TAG_NAME: ${{ needs.prepare.outputs.tag }}
|
||||
UV_PATH: 'C:\Users\gitea_runner_windows\.local\bin\uv.exe'
|
||||
UV_NO_PROJECT: "1"
|
||||
UV_NO_CONFIG: "1"
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Ensure Python via uv
|
||||
shell: powershell
|
||||
run: |
|
||||
if (-not (Test-Path $env:UV_PATH)) {
|
||||
Write-Error "uv not found at $env:UV_PATH"
|
||||
exit 1
|
||||
}
|
||||
|
||||
$version = "3.12"
|
||||
Write-Host "Checking for Python $version via uv..."
|
||||
$exists = & $env:UV_PATH python list | Select-String $version -Quiet
|
||||
|
||||
if (-not $exists) {
|
||||
Write-Host "Python $version not found; installing with uv..."
|
||||
& $env:UV_PATH python install $version
|
||||
} else {
|
||||
Write-Host "Python $version already installed in uv."
|
||||
}
|
||||
|
||||
- name: Install build dependencies
|
||||
shell: powershell
|
||||
run: |
|
||||
& $env:UV_PATH sync --all-groups
|
||||
|
||||
- name: Build Windows release with Nuitka
|
||||
shell: powershell
|
||||
run: |
|
||||
& $env:UV_PATH run --python 3.12 python -m nuitka `
|
||||
--standalone `
|
||||
--assume-yes-for-downloads `
|
||||
--output-dir=dist `
|
||||
--mingw64 `
|
||||
--include-data-dir=./config=config `
|
||||
--include-data-dir=./site=site `
|
||||
--include-data-dir=./icons=icons `
|
||||
--include-data-dir=./mail_vorlagen=mail_vorlagen `
|
||||
--enable-plugin=pyside6 `
|
||||
--product-name=SemesterApparatsManager `
|
||||
--product-version=${env:VERSION} `
|
||||
--output-filename=SAM.exe `
|
||||
main.py
|
||||
|
||||
- name: Prepare Windows Release Artifact
|
||||
shell: powershell
|
||||
run: |
|
||||
New-Item -ItemType Directory -Force -Path releases | Out-Null
|
||||
Set-Location dist
|
||||
Compress-Archive -Path * -DestinationPath "..\releases\SAM-windows-v${env:VERSION}.zip" -Force
|
||||
Set-Location ..
|
||||
|
||||
- name: Attach Windows asset to Gitea Release
|
||||
if: ${{ github.event.inputs.github_release == 'true' }}
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ env.TAG_NAME }}
|
||||
draft: false
|
||||
prerelease: ${{ github.event.inputs.prerelease }}
|
||||
files: |
|
||||
releases/SAM-windows-v${{ env.VERSION }}.zip
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.TOKEN }}
|
||||
# GITHUB_REPOSITORY: ${{ github.repository }}
|
||||
|
||||
216
MIGRATION_GUIDE.md
Normal file
216
MIGRATION_GUIDE.md
Normal file
@@ -0,0 +1,216 @@
|
||||
# Migration Guide: New File Structure
|
||||
|
||||
## Overview
|
||||
|
||||
The codebase has been reorganized to improve clarity, maintainability, and separation of concerns. This guide shows how to update your imports.
|
||||
|
||||
## New Structure Summary
|
||||
|
||||
```
|
||||
src/
|
||||
├── core/ # Domain models & constants (formerly in logic/)
|
||||
├── database/ # Data persistence (formerly in backend/)
|
||||
├── services/ # External APIs (from backend/ and logic/)
|
||||
├── parsers/ # File parsing (formerly in logic/)
|
||||
├── documents/ # Document generation (formerly in utils/)
|
||||
├── background/ # Threading tasks (formerly in backend/)
|
||||
├── admin/ # Admin commands (formerly in backend/)
|
||||
├── shared/ # Cross-cutting concerns (logging, config)
|
||||
├── utils/ # Pure utilities
|
||||
├── ui/ # UI components (unchanged)
|
||||
└── errors/ # Custom exceptions (unchanged)
|
||||
```
|
||||
|
||||
## Import Changes
|
||||
|
||||
### Core Domain Models
|
||||
|
||||
**OLD:**
|
||||
```python
|
||||
from src.logic import BookData, Prof, Semester, Apparat
|
||||
from src.logic.dataclass import BookData, Prof
|
||||
from src.logic.semester import Semester
|
||||
from src.logic.constants import APP_NRS, SEMAP_MEDIA_ACCOUNTS
|
||||
```
|
||||
|
||||
**NEW:**
|
||||
```python
|
||||
from src.core.models import BookData, Prof, Semester, Apparat, ApparatData
|
||||
from src.core import BookData, Prof, Semester # Can use shorthand
|
||||
from src.core.semester import Semester
|
||||
from src.core.constants import APP_NRS, SEMAP_MEDIA_ACCOUNTS
|
||||
```
|
||||
|
||||
### Database
|
||||
|
||||
**OLD:**
|
||||
```python
|
||||
from src.backend import Database
|
||||
from src.backend.database import Database
|
||||
from src.backend.db import CREATE_TABLE_MEDIA
|
||||
```
|
||||
|
||||
**NEW:**
|
||||
```python
|
||||
from src.database import Database
|
||||
from src.database.connection import Database # If you need specific module
|
||||
from src.database.schemas import CREATE_TABLE_MEDIA
|
||||
```
|
||||
|
||||
### External Services & APIs
|
||||
|
||||
**OLD:**
|
||||
```python
|
||||
from src.backend.catalogue import Catalogue
|
||||
from src.backend.webadis import get_book_medianr
|
||||
from src.logic.SRU import SWB
|
||||
from src.logic.lehmannsapi import LehmannsClient
|
||||
from src.logic.zotero import ZoteroController
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
```
|
||||
|
||||
**NEW:**
|
||||
```python
|
||||
from src.services import Catalogue, SWB, LehmannsClient, ZoteroController
|
||||
from src.services.catalogue import Catalogue
|
||||
from src.services.webadis import get_book_medianr
|
||||
from src.services.sru import SWB
|
||||
from src.services.lehmanns import LehmannsClient
|
||||
from src.services.zotero import ZoteroController
|
||||
from src.services.webrequest import BibTextTransformer, WebRequest
|
||||
```
|
||||
|
||||
### Parsers
|
||||
|
||||
**OLD:**
|
||||
```python
|
||||
from src.logic import csv_to_list, word_to_semap
|
||||
from src.logic.csvparser import csv_to_list
|
||||
from src.logic.wordparser import word_to_semap
|
||||
from src.logic.pdfparser import pdf_to_text
|
||||
from src.logic.xmlparser import xml_to_dict
|
||||
```
|
||||
|
||||
**NEW:**
|
||||
```python
|
||||
from src.parsers import csv_to_list, word_to_semap # Lazy loading
|
||||
from src.parsers.csv_parser import csv_to_list
|
||||
from src.parsers.word_parser import word_to_semap
|
||||
from src.parsers.pdf_parser import pdf_to_text
|
||||
from src.parsers.xml_parser import xml_to_dict
|
||||
```
|
||||
|
||||
### Document Generation
|
||||
|
||||
**OLD:**
|
||||
```python
|
||||
from src.utils.richtext import create_document, create_pdf
|
||||
```
|
||||
|
||||
**NEW:**
|
||||
```python
|
||||
from src.documents import create_document, create_pdf
|
||||
from src.documents.generators import create_document, create_pdf
|
||||
```
|
||||
|
||||
### Background Tasks
|
||||
|
||||
**OLD:**
|
||||
```python
|
||||
from src.backend import AutoAdder, AvailChecker, BookGrabber
|
||||
from src.backend.threads_autoadder import AutoAdder
|
||||
from src.backend.threads_availchecker import AvailChecker
|
||||
from src.backend.thread_bookgrabber import BookGrabber
|
||||
from src.backend.thread_neweditions import NewEditionCheckerThread
|
||||
```
|
||||
|
||||
**NEW:**
|
||||
```python
|
||||
from src.background import AutoAdder, AvailChecker, BookGrabber, NewEditionCheckerThread
|
||||
from src.background.autoadder import AutoAdder
|
||||
from src.background.availability_checker import AvailChecker
|
||||
from src.background.book_grabber import BookGrabber
|
||||
from src.background.new_editions import NewEditionCheckerThread
|
||||
```
|
||||
|
||||
### Admin Commands
|
||||
|
||||
**OLD:**
|
||||
```python
|
||||
from src.backend import AdminCommands
|
||||
from src.backend.admin_console import AdminCommands
|
||||
```
|
||||
|
||||
**NEW:**
|
||||
```python
|
||||
from src.admin import AdminCommands
|
||||
from src.admin.commands import AdminCommands
|
||||
```
|
||||
|
||||
### Configuration & Logging
|
||||
|
||||
**OLD:**
|
||||
```python
|
||||
from src.backend.settings import Settings
|
||||
from src.logic.settings import Settings
|
||||
from src.shared.logging import log # This stays the same
|
||||
```
|
||||
|
||||
**NEW:**
|
||||
```python
|
||||
from src.shared import Settings, load_config, log
|
||||
from src.shared.config import Settings, load_config
|
||||
from src.shared.logging import log
|
||||
```
|
||||
|
||||
## File Renames
|
||||
|
||||
| Old Path | New Path |
|
||||
|----------|----------|
|
||||
| `logic/dataclass.py` | `core/models.py` |
|
||||
| `logic/SRU.py` | `services/sru.py` |
|
||||
| `logic/lehmannsapi.py` | `services/lehmanns.py` |
|
||||
| `backend/database.py` | `database/connection.py` |
|
||||
| `backend/db.py` | `database/schemas.py` |
|
||||
| `backend/threads_autoadder.py` | `background/autoadder.py` |
|
||||
| `backend/threads_availchecker.py` | `background/availability_checker.py` |
|
||||
| `backend/thread_bookgrabber.py` | `background/book_grabber.py` |
|
||||
| `backend/thread_neweditions.py` | `background/new_editions.py` |
|
||||
| `backend/admin_console.py` | `admin/commands.py` |
|
||||
| `utils/richtext.py` | `documents/generators.py` |
|
||||
| `logic/csvparser.py` | `parsers/csv_parser.py` |
|
||||
| `logic/pdfparser.py` | `parsers/pdf_parser.py` |
|
||||
| `logic/wordparser.py` | `parsers/word_parser.py` |
|
||||
| `logic/xmlparser.py` | `parsers/xml_parser.py` |
|
||||
|
||||
## Quick Migration Checklist
|
||||
|
||||
1. ✅ Update all `from src.backend import Database` → `from src.database import Database`
|
||||
2. ✅ Update all `from src.logic import BookData` → `from src.core.models import BookData`
|
||||
3. ✅ Update all `from src.backend.catalogue` → `from src.services.catalogue`
|
||||
4. ✅ Update all `from src.logic.SRU` → `from src.services.sru`
|
||||
5. ✅ Update all `from src.backend.admin_console` → `from src.admin`
|
||||
6. ✅ Update threading imports from `src.backend.thread*` → `src.background.*`
|
||||
|
||||
## Benefits
|
||||
|
||||
- **Clearer architecture**: Each folder has a specific, well-defined purpose
|
||||
- **Better dependency flow**: core → database/services → background → ui
|
||||
- **Reduced duplication**: Merged 3 duplicate files (pickles.py, settings.py)
|
||||
- **Easier navigation**: Intuitive folder names ("services" vs "logic")
|
||||
- **Scalability**: Clear extension points for new features
|
||||
|
||||
## Backwards Compatibility
|
||||
|
||||
The old `backend/` and `logic/` folders still exist with original files. They will be removed in a future cleanup phase after thorough testing.
|
||||
|
||||
## Questions?
|
||||
|
||||
If you encounter import errors:
|
||||
1. Check this guide for the new import path
|
||||
2. Search for the class/function name in the new structure
|
||||
3. Most moves follow the pattern: external APIs → `services/`, data models → `core/`, threads → `background/`
|
||||
|
||||
## Status
|
||||
|
||||
✅ **Migration Complete** - Application successfully starts and runs with new structure!
|
||||
451
README.md
451
README.md
@@ -1,27 +1,438 @@
|
||||
# SemesterapparatsManager
|
||||
|
||||
SemesterapparatsManager is a graphical tool for managing semester apparatuses in the University of Education Freiburg. It allows the users to manage the semester apparatuses in a user-friendly way. It's functions include management of physical and digital semester apparatuses, as well as creating the citations for the digital files of the digital semester apparatuses. For that it uses Zotero, an open source reference management software. The semester apparatuses are stored in a SQLite database, which is created and managed by the SemesterapparatsManager. The SemesterapparatsManager is written in Python and uses the PyQt6 library for the graphical user interface
|
||||
[](https://www.python.org/downloads/)
|
||||
[](https://doc.qt.io/qtforpython/)
|
||||
[](LICENSE)
|
||||
|
||||
A comprehensive desktop application for managing semester course reserve collections (Semesterapparate) at the University of Education Freiburg. This tool streamlines the workflow of creating, managing, and maintaining both physical and digital course reserves, with integrated citation management powered by Zotero.
|
||||
|
||||
## Features
|
||||
- Manage physical semester apparatuses
|
||||
- Add semester apparatuses
|
||||
- Edit semester apparatuses
|
||||
- Delete semester apparatuses
|
||||
- Extend semester apparatuses
|
||||
- Notify professors about semester apparatuses creation or deletion
|
||||
- Add messages to all semester apparatuses, or an individual semester apparatus
|
||||
- Manage digital semester apparatuses
|
||||
- Use text parsing to extract information from the submitted form and create the scans
|
||||
- if a book is used multiple parts of a book are used, it can be split into the parts
|
||||
- Create the matching citations for the files
|
||||
- Statistics and Search
|
||||
- Search semester apparatuses by various criteria
|
||||
- Show statistics about the semester apparatuses creation and deletion
|
||||
- Edit user data
|
||||
## 📋 Table of Contents
|
||||
|
||||
- [Overview](#overview)
|
||||
- [Features](#features)
|
||||
- [Architecture](#architecture)
|
||||
- [Installation](#installation)
|
||||
- [Usage](#usage)
|
||||
- [Development](#development)
|
||||
- [Documentation](#documentation)
|
||||
- [Contributing](#contributing)
|
||||
- [License](#license)
|
||||
|
||||
## Images
|
||||
## 🎯 Overview
|
||||
|
||||

|
||||

|
||||
SemesterapparatsManager is a Python-based graphical application designed to simplify the complex workflow of academic course reserve management. It provides librarians and staff with tools to:
|
||||
|
||||
- **Manage Physical Reserves**: Track books, media, and materials reserved for courses
|
||||
- **Handle Digital Collections**: Process, scan, and catalog digital course materials
|
||||
- **Automate Citations**: Generate proper bibliographic citations using Zotero integration
|
||||
- **Communicate**: Send automated emails to professors about reserve status
|
||||
- **Analyze**: View statistics and search through historical data
|
||||
- **Integrate**: Connect with library catalogs (SWB, DNB) and vendor APIs (Lehmanns)
|
||||
|
||||
### Key Technologies
|
||||
|
||||
- **Framework**: PySide6 (Qt6) for cross-platform GUI
|
||||
- **Database**: SQLite with migration support
|
||||
- **APIs**: Integration with SWB, DNB, Zotero, OpenAI, and catalog services
|
||||
- **Document Processing**: Word, PDF, CSV, and XML parsing
|
||||
- **Bibliography**: Zotero-based citation management
|
||||
|
||||
## ✨ Features
|
||||
|
||||
### Course Reserve Management
|
||||
|
||||
- **Create & Edit**: Add new semester apparatus with book and media entries
|
||||
- **Extend Duration**: Extend existing reserves for additional semesters
|
||||
- **Smart Search**: Find reserves by semester, professor, subject, or signature
|
||||
- **Availability Checking**: Automated checks against library catalog
|
||||
- **New Edition Detection**: Background thread to find newer editions of books
|
||||
|
||||
### Digital Collection Features
|
||||
|
||||
- **Document Parsing**: Extract information from submitted Word/PDF forms
|
||||
- **Smart Splitting**: Automatically split multi-part book sections
|
||||
- **Citation Generation**: Create proper citations for all digital files
|
||||
- **ELSA Integration**: Manage electronic semester apparatus (ELSA) workflows
|
||||
- **File Management**: Track and recreate files from database
|
||||
|
||||
### Communication & Notifications
|
||||
|
||||
- **Email Templates**: Pre-configured templates for common scenarios
|
||||
- **Professor Notifications**: Automated emails for creation, extension, or dissolution
|
||||
- **Message System**: Attach messages to specific reserves or broadcast to all
|
||||
|
||||
### Data & Analytics
|
||||
|
||||
- **Statistics Dashboard**: Visualize creation and deletion trends
|
||||
- **Advanced Search**: Multi-criteria search across all reserves
|
||||
- **Export**: Generate reports and documentation
|
||||
- **Calendar View**: Timeline of reserve activities
|
||||
|
||||
### Administration
|
||||
|
||||
- **User Management**: Create, edit, and delete system users
|
||||
- **Professor Database**: Maintain professor contact information
|
||||
- **Settings Configuration**: Customize database paths, temp directories, API keys
|
||||
- **Backup & Migration**: Database migration support for schema updates
|
||||
|
||||
## 🏗️ Architecture
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
SemesterapparatsManager/
|
||||
├── src/
|
||||
│ ├── core/ # Domain models & constants
|
||||
│ │ ├── models.py # BookData, Prof, Apparat, Semester, etc.
|
||||
│ │ ├── constants.py # Application constants
|
||||
│ │ └── semester.py # Semester handling logic
|
||||
│ ├── database/ # Data persistence layer
|
||||
│ │ ├── connection.py # Database class & operations
|
||||
│ │ ├── schemas.py # SQL schema definitions
|
||||
│ │ └── migrations/ # SQL migration files
|
||||
│ ├── services/ # External API integrations
|
||||
│ │ ├── catalogue.py # RDS catalog scraping
|
||||
│ │ ├── sru.py # SWB/DNB library API client
|
||||
│ │ ├── lehmanns.py # Lehmanns bookstore API
|
||||
│ │ ├── zotero.py # Zotero integration
|
||||
│ │ ├── webadis.py # WebADIS automation
|
||||
│ │ └── openai.py # OpenAI API integration
|
||||
│ ├── parsers/ # Document & file parsing
|
||||
│ │ ├── csv_parser.py # CSV parsing
|
||||
│ │ ├── word_parser.py # Word document parsing
|
||||
│ │ ├── pdf_parser.py # PDF text extraction
|
||||
│ │ ├── xml_parser.py # XML parsing
|
||||
│ │ └── transformers/ # Bibliography format conversion
|
||||
│ ├── documents/ # Document generation
|
||||
│ │ └── generators.py # Word/PDF document creation
|
||||
│ ├── background/ # Background tasks & threading
|
||||
│ │ ├── autoadder.py # Automatic book addition
|
||||
│ │ ├── availability_checker.py # Catalog availability
|
||||
│ │ ├── book_grabber.py # Catalog metadata retrieval
|
||||
│ │ └── new_editions.py # New edition detection
|
||||
│ ├── ui/ # User interface components
|
||||
│ │ ├── userInterface.py # Main application window
|
||||
│ │ ├── dialogs/ # Dialog windows
|
||||
│ │ └── widgets/ # Reusable UI widgets
|
||||
│ ├── admin/ # Administrative functions
|
||||
│ │ └── commands.py # Admin CLI commands
|
||||
│ ├── utils/ # Utility functions
|
||||
│ │ ├── files.py # File operations
|
||||
│ │ ├── sorting.py # Custom sorting logic
|
||||
│ │ └── blob.py # Binary data handling
|
||||
│ ├── shared/ # Cross-cutting concerns
|
||||
│ │ ├── logging.py # Centralized logging
|
||||
│ │ └── config.py # Configuration management
|
||||
│ └── errors/ # Custom exceptions
|
||||
│ └── database.py # Database-specific errors
|
||||
├── tests/ # Test suite
|
||||
├── docs/ # Documentation
|
||||
├── mail_vorlagen/ # Email templates
|
||||
├── config.yaml # Application configuration
|
||||
├── main.py # Application entry point
|
||||
└── README.md
|
||||
```
|
||||
|
||||
### Architecture Principles
|
||||
|
||||
**Layered Architecture**:
|
||||
```
|
||||
UI Layer (PySide6 Qt Widgets)
|
||||
↓
|
||||
Background Tasks (QThread workers)
|
||||
↓
|
||||
Business Logic (Core models & operations)
|
||||
↓
|
||||
Services Layer (External API integrations)
|
||||
↓
|
||||
Data Access Layer (Database & file operations)
|
||||
```
|
||||
|
||||
**Key Design Patterns**:
|
||||
- **Repository Pattern**: Database class abstracts data persistence
|
||||
- **Service Layer**: External integrations isolated in `services/`
|
||||
- **Observer Pattern**: Qt signals/slots for event-driven updates
|
||||
- **Factory Pattern**: Document and citation generators
|
||||
- **Strategy Pattern**: Multiple parsing strategies for different file formats
|
||||
|
||||
## 🚀 Installation
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Python 3.10 or higher
|
||||
- [uv](https://github.com/astral-sh/uv) - Fast Python package installer and resolver (recommended)
|
||||
```bash
|
||||
# Install uv (Windows PowerShell)
|
||||
powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
|
||||
|
||||
# Or using pip
|
||||
pip install uv
|
||||
```
|
||||
|
||||
### Setup Steps (Using uv - Recommended)
|
||||
|
||||
1. **Clone the repository**:
|
||||
```bash
|
||||
git clone https://github.com/yourusername/SemesterapparatsManager.git
|
||||
cd SemesterapparatsManager
|
||||
```
|
||||
|
||||
2. **Create virtual environment and install dependencies**:
|
||||
```bash
|
||||
# uv automatically creates venv and installs dependencies
|
||||
uv sync
|
||||
```
|
||||
|
||||
3. **Configure application**:
|
||||
- First launch will present a setup wizard
|
||||
- Configure database path, temp directory, and API keys
|
||||
- Create admin user account
|
||||
|
||||
4. **Run the application**:
|
||||
```bash
|
||||
uv run python main.py
|
||||
```
|
||||
|
||||
### Alternative Setup (Using pip/venv)
|
||||
|
||||
<details>
|
||||
<summary>Click to expand traditional pip installation steps</summary>
|
||||
|
||||
1. **Create virtual environment**:
|
||||
```bash
|
||||
python -m venv .venv
|
||||
```
|
||||
|
||||
2. **Activate virtual environment**:
|
||||
- Windows (PowerShell):
|
||||
```powershell
|
||||
.venv\Scripts\Activate.ps1
|
||||
```
|
||||
- Linux/Mac:
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
3. **Install dependencies**:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
4. **Run the application**:
|
||||
```bash
|
||||
python main.py
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### Building Executable
|
||||
|
||||
To build a standalone executable:
|
||||
|
||||
```bash
|
||||
# Using uv
|
||||
uv run pyinstaller --noconfirm --onedir --windowed \
|
||||
--icon='icons/app.ico' \
|
||||
--name='SemesterapparatsManager' \
|
||||
--clean \
|
||||
--add-data='config.yaml;.' \
|
||||
--add-data='icons;icons' \
|
||||
main.py
|
||||
```
|
||||
|
||||
Or use the provided build task (see `pyproject.toml`).
|
||||
|
||||
## 📖 Usage
|
||||
|
||||
### First Time Setup
|
||||
|
||||
1. **Launch Application**: Run `python main.py`
|
||||
2. **Setup Wizard**: Configure basic settings
|
||||
- Database location
|
||||
- Temporary files directory
|
||||
- Library catalog credentials (optional)
|
||||
- API keys (Zotero, OpenAI - optional)
|
||||
3. **Create Admin User**: Set up your admin credentials
|
||||
4. **Login**: Use your credentials to access the main interface
|
||||
|
||||
### Creating a Semester Apparatus
|
||||
|
||||
1. **Navigate**: Main window → "Neuer Apparat" (New Apparatus)
|
||||
2. **Fill Details**:
|
||||
- Semester (e.g., WiSe 2024/25)
|
||||
- Professor information
|
||||
- Course subject
|
||||
- Apparatus number
|
||||
3. **Add Books**: Click "Buch hinzufügen" (Add Book)
|
||||
- Enter signature or search by title
|
||||
- System fetches metadata from catalog
|
||||
- Add multiple books as needed
|
||||
4. **Add Media**: Click "Medium hinzufügen" (Add Media)
|
||||
- DVDs, CDs, or other media types
|
||||
5. **Save**: Confirm and create the apparatus
|
||||
6. **Generate Email**: Optionally send notification to professor
|
||||
|
||||
### Managing Digital Collections (ELSA)
|
||||
|
||||
1. **Upload Form**: Submit Word/PDF form with book chapter information
|
||||
### Setting Up Development Environment
|
||||
|
||||
1. **Install all dependencies** (including dev dependencies):
|
||||
```bash
|
||||
# Using uv (recommended)
|
||||
uv sync --all-extras
|
||||
|
||||
# Or using pip
|
||||
pip install -r requirements-dev.txt
|
||||
```
|
||||
|
||||
2. **Enable logging**:
|
||||
```python
|
||||
from src.shared.logging import configure
|
||||
configure("DEBUG") # In main.py
|
||||
```
|
||||
|
||||
3. **Run tests**:
|
||||
```bash
|
||||
# Using uv
|
||||
uv run pytest tests/
|
||||
|
||||
# Or with activated venv
|
||||
pytest tests/
|
||||
```ministrative Tasks
|
||||
|
||||
- **User Management**: Admin → Users → Create/Edit/Delete
|
||||
- **Professor Database**: Admin → Professors → Manage contacts
|
||||
- **System Settings**: Edit → Settings → Configure paths and APIs
|
||||
- **Database Maintenance**: Admin → Database → Run migrations
|
||||
|
||||
## 🛠️ Development
|
||||
|
||||
### Setting Up Development Environment
|
||||
|
||||
1. **Install dev dependencies**:
|
||||
```bash
|
||||
pip install -r requirements-dev.txt
|
||||
```
|
||||
|
||||
2. **Enable logging**:
|
||||
```python
|
||||
from src.shared.logging import configure
|
||||
configure("DEBUG") # In main.py
|
||||
```
|
||||
|
||||
3. **Run tests**:
|
||||
```bash
|
||||
pytest tests/
|
||||
```
|
||||
|
||||
### Project Standards
|
||||
|
||||
- **Code Style**: Follow PEP 8
|
||||
- **Type Hints**: Use type annotations where possible
|
||||
- **Docstrings**: Google-style docstrings for all public functions
|
||||
- **Logging**: Use centralized logger from `src.shared.logging`
|
||||
- **Imports**: Use new structure (see MIGRATION_GUIDE.md)
|
||||
|
||||
### Database Migrations
|
||||
|
||||
To create a new migration:
|
||||
|
||||
1. Create file: `src/database/migrations/V###__description.sql`
|
||||
2. Use sequential numbering (V001, V002, etc.)
|
||||
3. Write idempotent SQL (use `IF NOT EXISTS`)
|
||||
4. Test migration on copy of production database
|
||||
|
||||
Example:
|
||||
```sql
|
||||
-- V003__add_user_preferences.sql
|
||||
CREATE TABLE IF NOT EXISTS user_preferences (
|
||||
user_id INTEGER PRIMARY KEY,
|
||||
theme TEXT DEFAULT 'light',
|
||||
language TEXT DEFAULT 'de',
|
||||
FOREIGN KEY (user_id) REFERENCES user(id)
|
||||
);
|
||||
```
|
||||
|
||||
### Adding New Features
|
||||
|
||||
**New Service Integration**:
|
||||
1. Create module in `src/services/`
|
||||
2. Implement client class with proper error handling
|
||||
3. Add to `src/services/__init__.py`
|
||||
4. Document API requirements
|
||||
|
||||
**New Document Parser**:
|
||||
1. Create module in `src/parsers/`
|
||||
2. Implement parsing function returning core models
|
||||
3. Add to `src/parsers/__init__.py`
|
||||
4. Write unit tests
|
||||
|
||||
**New UI Dialog**:
|
||||
1. Design in Qt Designer (`.ui` file)
|
||||
2. Convert: `pyside6-uic dialog.ui -o dialog_ui.py`
|
||||
3. Create dialog class in `src/ui/dialogs/`
|
||||
4. Connect signals to business logic
|
||||
### Building Documentation
|
||||
|
||||
```bash
|
||||
# Using uv
|
||||
uv run mkdocs build
|
||||
uv run mkdocs serve # View at http://localhost:8000
|
||||
|
||||
# Or with activated venv
|
||||
mkdocs build
|
||||
mkdocs serve
|
||||
```*[API Documentation](docs/)**: Detailed module documentation
|
||||
- **[User Manual](docs/index.md)**: Complete user guide (MkDocs)
|
||||
|
||||
### Building Documentation
|
||||
|
||||
```bash
|
||||
mkdocs build
|
||||
mkdocs serve # View at http://localhost:8000
|
||||
```
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
Contributions are welcome! Please follow these guidelines:
|
||||
|
||||
1. **Fork** the repository
|
||||
2. **Create** a feature branch (`git checkout -b feature/amazing-feature`)
|
||||
3. **Commit** your changes (`git commit -m 'Add amazing feature'`)
|
||||
4. **Push** to the branch (`git push origin feature/amazing-feature`)
|
||||
5. **Open** a Pull Request
|
||||
|
||||
### Code Review Checklist
|
||||
|
||||
- [ ] Code follows project style guidelines
|
||||
- [ ] All tests pass
|
||||
- [ ] New features have tests
|
||||
- [ ] Documentation is updated
|
||||
- [ ] No sensitive data in commits
|
||||
- [ ] Import paths use new structure
|
||||
|
||||
## 📄 License
|
||||
|
||||
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
||||
|
||||
## 🙏 Acknowledgments
|
||||
|
||||
- **University of Education Freiburg**: Project sponsor and primary user
|
||||
- **Qt/PySide6**: Excellent cross-platform GUI framework
|
||||
- **Zotero**: Citation management integration
|
||||
- **SWB/DNB**: Library catalog services
|
||||
|
||||
## 📞 Support
|
||||
|
||||
For questions, issues, or feature requests:
|
||||
- **Issues**: [Gitea Issues](https://git.theprivateserver.de/PHB/SemesterapparatsManager/issues)
|
||||
- **Email**: alexander.kirchner@ph-freiburg.de
|
||||
- **Documentation**: [Read the Docs](https://semesterapparatsmanager.readthedocs.io)
|
||||
|
||||
## 🗺️ Roadmap
|
||||
|
||||
TBD
|
||||
---
|
||||
|
||||
**Built with ❤️ for academic libraries**
|
||||
@@ -50,6 +50,7 @@ class Mail:
|
||||
smtp_server: str
|
||||
port: int
|
||||
sender: str
|
||||
sender_name: str
|
||||
password: str
|
||||
use_user_name: bool
|
||||
printer_mail: str
|
||||
|
||||
1
icons/manage_search.svg
Normal file
1
icons/manage_search.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#e3e3e3"><path d="M80-200v-80h400v80H80Zm0-200v-80h200v80H80Zm0-200v-80h200v80H80Zm744 400L670-354q-24 17-52.5 25.5T560-320q-83 0-141.5-58.5T360-520q0-83 58.5-141.5T560-720q83 0 141.5 58.5T760-520q0 29-8.5 57.5T726-410l154 154-56 56ZM560-400q50 0 85-35t35-85q0-50-35-85t-85-35q-50 0-85 35t-35 85q0 50 35 85t85 35Z"/></svg>
|
||||
|
After Width: | Height: | Size: 420 B |
1
icons/search_results.svg
Normal file
1
icons/search_results.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#e3e3e3"><path d="M400-320q100 0 170-70t70-170q0-100-70-170t-170-70q-100 0-170 70t-70 170q0 100 70 170t170 70Zm-40-120v-280h80v280h-80Zm-140 0v-200h80v200h-80Zm280 0v-160h80v160h-80ZM824-80 597-307q-41 32-91 49.5T400-240q-134 0-227-93T80-560q0-134 93-227t227-93q134 0 227 93t93 227q0 56-17.5 106T653-363l227 227-56 56Z"/></svg>
|
||||
|
After Width: | Height: | Size: 425 B |
1
icons/trash.svg
Normal file
1
icons/trash.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#e3e3e3"><path d="M280-120q-33 0-56.5-23.5T200-200v-520h-40v-80h200v-40h240v40h200v80h-40v520q0 33-23.5 56.5T680-120H280Zm400-600H280v520h400v-520ZM360-280h80v-360h-80v360Zm160 0h80v-360h-80v360ZM280-720v520-520Z"/></svg>
|
||||
|
After Width: | Height: | Size: 319 B |
@@ -0,0 +1,13 @@
|
||||
|
||||
Subject: Bitte um Bestellung von Neuerwerbungen für Semesterapparat {AppNr} - {AppName}
|
||||
|
||||
|
||||
Hallo zusammen,
|
||||
|
||||
für den Semesterapparat {AppNr} - {Appname} wurden folgende Neuauflagen gefunden:
|
||||
|
||||
{newEditionsOrdered}
|
||||
|
||||
Wäre es möglich, diese, oder neuere Auflagen (wenn vorhanden), zu bestellen?
|
||||
|
||||
{signature}
|
||||
@@ -1,54 +0,0 @@
|
||||
Message-ID: <987b46cf-2d8b-4a27-acb3-c50f61d3d85d@ph-freiburg.de>
|
||||
Date: Tue, 31 Oct 2023 11:38:34 +0100
|
||||
MIME-Version: 1.0
|
||||
User-Agent: Mozilla Thunderbird
|
||||
From: Alexander Kirchner <alexander.kirchner@ph-freiburg.de>
|
||||
Subject: =?UTF-8?Q?Information_bez=C3=BCglich_der_Aufl=C3=B6sung_des_Semeste?=
|
||||
=?UTF-8?Q?rapparates_=7BAppNr=7D?=
|
||||
Content-Language: de-DE
|
||||
X-Mozilla-Draft-Info: internal/draft; vcard=0; receipt=0; DSN=0; uuencode=0;
|
||||
attachmentreminder=0; deliveryformat=0
|
||||
X-Identity-Key: id1
|
||||
Fcc: imap://aky547@imap.ph-freiburg.de/INBOX/Sent
|
||||
Content-Type: text/html; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
||||
</head>
|
||||
<body>
|
||||
<p>Sehr geehrte/r {Profname}, <br>
|
||||
</p>
|
||||
<p><br>
|
||||
</p>
|
||||
auf die E-Mail bezüglich der Auflösung oder Verlängerung der
|
||||
Semesterapparate haben wir von Ihnen keine Rückmeldung erhalten.
|
||||
Deshalb gehen wir davon aus, dass der Apparat aufgelöst werden kann.
|
||||
Die Medien, die im Apparat aufgestellt waren, werden nun wieder
|
||||
regulär ausleihbar und sind dann an ihren Standorten bei den Fächern
|
||||
zu finden. <br>
|
||||
<br>
|
||||
Falls Sie den Apparat erneut, oder einen neuen Apparat anlegen
|
||||
wollen, können Sie mir das ausgefüllte Formular zur Einrichtung des
|
||||
Apparates (<a class="moz-txt-link-freetext"
|
||||
href="https://www.ph-freiburg.de/bibliothek/lernen/semesterapparate/info-lehrende-sem.html">https://www.ph-freiburg.de/bibliothek/lernen/semesterapparate/info-lehrende-sem.html</a>)
|
||||
zukommen lassen. Im Falle einer Verlängerung des Apparates reicht
|
||||
eine Antwort auf diese Mail.
|
||||
<p><br>
|
||||
</p>
|
||||
<p>Bei Fragen können Sie sich jederzeit an mich wenden.<br>
|
||||
</p>
|
||||
<p><br>
|
||||
</p>
|
||||
<pre class="moz-signature" cols="72">--
|
||||
Freundliche Grüße
|
||||
|
||||
Alexander Kirchner
|
||||
|
||||
|
||||
Bibliothek der Pädagogischen Hochschule Freiburg
|
||||
Tel. 0761/682-778</pre>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,37 +1,17 @@
|
||||
Message-ID: <b44248a9-025e-e86c-85d7-5949534f0ac4@ph-freiburg.de>
|
||||
Date: Mon, 17 Jul 2023 12:59:04 +0200
|
||||
MIME-Version: 1.0
|
||||
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101
|
||||
Thunderbird/102.13.0
|
||||
Content-Language: de-DE
|
||||
From: {user_name} <{user_mail}>
|
||||
Subject: =?UTF-8?Q?Information_bez=c3=bcglich_der_Aufl=c3=b6sung_des_Semeste?=
|
||||
=?UTF-8?Q?rapparates_=7bAppNr=7d?=
|
||||
X-Mozilla-Draft-Info: internal/draft; vcard=0; receipt=0; DSN=0; uuencode=0;
|
||||
attachmentreminder=0; deliveryformat=0
|
||||
X-Identity-Key: id1
|
||||
Fcc: imap://aky547@imap.ph-freiburg.de/INBOX/Sent
|
||||
Content-Type: text/html; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
||||
</head>
|
||||
<body>{greeting}
|
||||
<br>
|
||||
<p>auf die E-Mail bezüglich der Auflösung oder Verlängerung der Semesterapparate haben wir von Ihnen keine Rückmeldung erhalten. Deshalb gehen wir davon aus, dass der Apparat aufgelöst werden kann.</p>
|
||||
<p> Die Medien, die in den Apparaten aufgestellt waren, werden nun wieder regulär ausleihbar und sind dann an ihren Standorten bei den Fächern zu finden.</p>
|
||||
<p></p>
|
||||
<p>Falls Sie den Apparat erneut, oder einen neuen Apparat anlegen wollen, können Sie mir das ausgefüllte Formular zur Einrichtung des Apparates (<a class="moz-txt-link-freetext" href="https://www.ph-freiburg.de/bibliothek/lernen/semesterapparate/info-lehrende-sem.html">https://www.ph-freiburg.de/bibliothek/lernen/semesterapparate/info-lehrende-sem.html</a>) zukommen lassen.</p>
|
||||
<p>Im Falle einer Verlängerung des Apparates reicht eine Antwort auf diese Mail.<br>
|
||||
</p>
|
||||
<p>Bei Fragen können Sie sich jederzeit an mich wenden.<br>
|
||||
</p>
|
||||
<p><br>
|
||||
</p>
|
||||
<pre class="moz-signature" cols="72">--
|
||||
{signature}
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
||||
Subject: Information bezüglich der Auflösung des Semesterapparates {AppNr}
|
||||
|
||||
|
||||
{greeting}
|
||||
|
||||
auf die E-Mail bezüglich der Auflösung oder Verlängerung der Semesterapparate haben wir von Ihnen keine Rückmeldung erhalten. Deshalb gehen wir davon aus, dass der Apparat aufgelöst werden kann.
|
||||
Die Medien, die in den Apparaten aufgestellt waren, werden nun wieder regulär ausleihbar und sind dann an ihren Standorten bei den Fächern zu finden.
|
||||
|
||||
Falls Sie den Apparat erneut, oder einen neuen Apparat anlegen wollen,
|
||||
können Sie mir das ausgefüllte Formular zur Einrichtung des Apparates
|
||||
https://www.ph-freiburg.de/bibliothek/lernen/semesterapparate/info-lehrende-sem.html
|
||||
zukommen lassen. Im Falle einer Verlängerung des Apparates reicht eine Antwort auf diese Mail.
|
||||
|
||||
Bei Fragen können Sie sich jederzeit an mich wenden.
|
||||
|
||||
{signature}
|
||||
@@ -1,36 +1,16 @@
|
||||
Message-ID: <db617c48-29d6-d3d8-a67c-e9a6cf9b5bdb@ph-freiburg.de>
|
||||
Date: Tue, 12 Sep 2023 13:01:35 +0200
|
||||
MIME-Version: 1.0
|
||||
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101
|
||||
Thunderbird/102.15.0
|
||||
From: Alexander Kirchner <alexander.kirchner@ph-freiburg.de>
|
||||
Subject: Information zum Semesterapparat {AppNr} - {Appname}
|
||||
Content-Language: de-DE
|
||||
X-Mozilla-Draft-Info: internal/draft; vcard=0; receipt=0; DSN=0; uuencode=0;
|
||||
attachmentreminder=0; deliveryformat=0
|
||||
X-Identity-Key: id1
|
||||
Fcc: imap://aky547@imap.ph-freiburg.de/INBOX/Sent
|
||||
Content-Type: text/html; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
||||
</head>
|
||||
<body>{greeting}
|
||||
<br>
|
||||
<p>Ihr Semesterapparat {Appname} wurde angelegt.</p>
|
||||
<p>Unter folgendem Link können Sie die Apparate einsehen:</p>
|
||||
<p><a class="moz-txt-link-freetext" href="https://bsz.ibs-bw.de/aDISWeb/app?service=direct/0/Home/$DirectLink&sp=SOPAC42&sp=SWI00000002&noRedir">https://bsz.ibs-bw.de/aDISWeb/app?service=direct/0/Home/$DirectLink&sp=SOPAC42&sp=SWI00000002&noRedir</a></p>
|
||||
<p>Ihr Apparat ist unter {AppSubject} > {Profname} > {AppNr} {Appname}.<br>
|
||||
</p>
|
||||
<p><br>
|
||||
</p>
|
||||
<p>Noch nicht vorhandene Medien wurden vorgemerkt und werden nach Rückkehr in die Bibliothek eingearbeitet.</p>
|
||||
<p>Bei Fragen können Sie sich per Mail bei mir melden.<br>
|
||||
</p>
|
||||
<pre class="moz-signature" cols="72">--
|
||||
{signature}
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
||||
Subject: Information zum Semesterapparat {AppNr} - {AppName}
|
||||
|
||||
|
||||
{greeting}
|
||||
|
||||
Ihr Semesterapparat {Appname} wurde angelegt.
|
||||
Unter folgendem Link können Sie die Apparate einsehen:
|
||||
https://bsz.ibs-bw.de/aDISWeb/app?service=direct/0/Home/$DirectLink&sp=SOPAC42&sp=SWI00000002&noRedir
|
||||
|
||||
Ihr Apparat ist unter {AppSubject} > {Profname} > {AppNr} {Appname}
|
||||
|
||||
Noch nicht vorhandene Medien wurden vorgemerkt und werden nach Rückkehr in die Bibliothek eingearbeitet.
|
||||
Bei Fragen können Sie sich per Mail bei mir melden.
|
||||
|
||||
{signature}
|
||||
@@ -0,0 +1,10 @@
|
||||
|
||||
Subject: Information zur Auflösung des Semesterapparates {AppNr} - {Appname}
|
||||
|
||||
|
||||
{greeting}
|
||||
|
||||
Ihr Semesterapparat "{Appname} ({AppNr})" wurde wie besprochen aufgelöst.
|
||||
Die Medien sind von nun an wieder in den Regalen zu finden.
|
||||
|
||||
{signature}
|
||||
@@ -1,18 +0,0 @@
|
||||
Subject: Information zur Auflösung des Semesterapparates {AppNr} - {Appname}
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/html; charset="UTF-8"
|
||||
Content-Transfer-Encoding: 8bit
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" "http://www.w3.org/TR/REC-html40/strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
||||
</head>
|
||||
<body style=" font-family:'Segoe UI'; font-size:9pt; font-weight:400; font-style:normal;">
|
||||
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">{greeting}</p>
|
||||
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p>
|
||||
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">Ihr Semesterapparat "{Appname} ({AppNr})" wurde wie besprochen aufgelöst. </p>
|
||||
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">Die Medien sind von nun an wieder in den Regalen zu finden.</p>
|
||||
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p>
|
||||
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><pre class="moz-signature" cols="72">-- </p>
|
||||
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">{signature}</p>
|
||||
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"></pre></p></body></html>
|
||||
@@ -0,0 +1,15 @@
|
||||
|
||||
Subject: Neuauflagen für Semesterapparat {AppNr} - {AppName}
|
||||
|
||||
|
||||
{greeting}
|
||||
|
||||
Für Ihren Semesterapparat {AppNr} - {Appname} wurden folgende Neuauflagen gefunden:
|
||||
|
||||
{newEditions}
|
||||
|
||||
Sollen wir die alte(n) Auflage(n) aus dem Apparat durch diese austauschen?
|
||||
Nicht vorhandene Exemplare werden an die Erwerbungsabteilung weitergegeben
|
||||
und nach Erhalt der Medien in den Apparat eingearbeitet.
|
||||
|
||||
{signature}
|
||||
9
mail_vorlagen/blankomail.eml
Normal file
9
mail_vorlagen/blankomail.eml
Normal file
@@ -0,0 +1,9 @@
|
||||
|
||||
Subject: CHANGEME
|
||||
|
||||
|
||||
{greeting}
|
||||
|
||||
|
||||
|
||||
{signature}
|
||||
13
main.py
13
main.py
@@ -1,11 +1,14 @@
|
||||
from src import first_launch, settings
|
||||
from src.ui.widgets.welcome_wizard import launch_wizard as startup
|
||||
from PySide6 import QtWidgets
|
||||
import sys
|
||||
from src.ui.userInterface import launch_gui as UI
|
||||
|
||||
from PySide6 import QtWidgets
|
||||
|
||||
from src import first_launch, settings
|
||||
from src.shared.logging import configure
|
||||
from src.ui.userInterface import launch_gui as UI
|
||||
from src.ui.widgets.welcome_wizard import launch_wizard as startup
|
||||
|
||||
if __name__ == "__main__":
|
||||
configure("INFO")
|
||||
app = QtWidgets.QApplication(sys.argv)
|
||||
if not first_launch:
|
||||
setup = startup()
|
||||
@@ -16,4 +19,4 @@ if __name__ == "__main__":
|
||||
else:
|
||||
sys.exit()
|
||||
else:
|
||||
UI()
|
||||
UI()
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
[project]
|
||||
name = "semesterapparatsmanager"
|
||||
version = "1.0.0"
|
||||
version = "1.0.2"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"appdirs>=1.4.4",
|
||||
"beautifulsoup4>=4.12.3",
|
||||
"beautifulsoup4>=4.13.5",
|
||||
"bump-my-version>=0.29.0",
|
||||
"chardet>=5.2.0",
|
||||
"charset-normalizer>=3.4.3",
|
||||
"comtypes>=1.4.9",
|
||||
"darkdetect>=0.8.0",
|
||||
"docx2pdf>=0.1.8",
|
||||
"httpx>=0.28.1",
|
||||
"loguru>=0.7.3",
|
||||
"mkdocs>=1.6.1",
|
||||
"mkdocs-material>=9.5.49",
|
||||
@@ -20,6 +21,7 @@ dependencies = [
|
||||
"omegaconf>=2.3.0",
|
||||
"openai>=1.79.0",
|
||||
"pandas>=2.2.3",
|
||||
"pdfquery>=0.4.3",
|
||||
"playwright>=1.49.1",
|
||||
"pyramid>=2.0.2",
|
||||
"pyside6>=6.9.1",
|
||||
@@ -30,14 +32,11 @@ dependencies = [
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"bump-my-version>=0.29.0",
|
||||
"icecream>=2.1.4",
|
||||
"nuitka>=2.5.9",
|
||||
]
|
||||
dev = ["bump-my-version>=0.29.0", "icecream>=2.1.4", "nuitka>=2.5.9"]
|
||||
swbtest = ["alive-progress>=3.3.0"]
|
||||
|
||||
[tool.bumpversion]
|
||||
current_version = "1.0.0"
|
||||
current_version = "1.0.2"
|
||||
parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
|
||||
serialize = ["{major}.{minor}.{patch}"]
|
||||
search = "{current_version}"
|
||||
|
||||
@@ -1,32 +1,33 @@
|
||||
__version__ = "1.0.0"
|
||||
__version__ = "1.0.2"
|
||||
__author__ = "Alexander Kirchner"
|
||||
__all__ = ["__version__", "__author__", "Icon", "settings"]
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
from appdirs import AppDirs
|
||||
|
||||
from config import Config
|
||||
|
||||
|
||||
app = AppDirs("SemesterApparatsManager", "SAM")
|
||||
LOG_DIR = app.user_log_dir
|
||||
CONFIG_DIR = app.user_config_dir
|
||||
if not os.path.exists(LOG_DIR):
|
||||
os.makedirs(LOG_DIR)
|
||||
if not os.path.exists(CONFIG_DIR):
|
||||
os.makedirs(CONFIG_DIR)
|
||||
LOG_DIR: str = app.user_log_dir # type: ignore
|
||||
CONFIG_DIR: str = app.user_config_dir # type: ignore
|
||||
if not os.path.exists(LOG_DIR): # type: ignore
|
||||
os.makedirs(LOG_DIR) # type: ignore
|
||||
if not os.path.exists(CONFIG_DIR): # type: ignore
|
||||
os.makedirs(CONFIG_DIR) # type: ignore
|
||||
|
||||
|
||||
settings = Config(f"{CONFIG_DIR}/config.yaml")
|
||||
DATABASE_DIR = (
|
||||
app.user_config_dir if settings.database.path is None else settings.database.path
|
||||
DATABASE_DIR: Union[Path, str] = ( # type: ignore
|
||||
app.user_config_dir if settings.database.path is None else settings.database.path # type: ignore
|
||||
)
|
||||
if not os.path.exists(DATABASE_DIR):
|
||||
os.makedirs(DATABASE_DIR)
|
||||
if not os.path.exists(DATABASE_DIR): # type: ignore
|
||||
os.makedirs(DATABASE_DIR) # type: ignore
|
||||
first_launch = settings.exists
|
||||
if not os.path.exists(settings.database.temp.expanduser()):
|
||||
settings.database.temp.expanduser().mkdir(parents=True, exist_ok=True)
|
||||
if not os.path.exists(settings.database.temp.expanduser()): # type: ignore
|
||||
settings.database.temp.expanduser().mkdir(parents=True, exist_ok=True) # type: ignore
|
||||
from .utils.icon import Icon
|
||||
|
||||
if not os.path.exists("logs"):
|
||||
|
||||
5
src/admin/__init__.py
Normal file
5
src/admin/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Administrative functions and commands."""
|
||||
|
||||
from .commands import AdminCommands
|
||||
|
||||
__all__ = ["AdminCommands"]
|
||||
103
src/admin/commands.py
Normal file
103
src/admin/commands.py
Normal file
@@ -0,0 +1,103 @@
|
||||
import hashlib
|
||||
import random
|
||||
|
||||
from src.database import Database
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
# change passwords for apparats, change passwords for users, list users, create and delete users etc
|
||||
# create a class that has all commands. for each command, create a function that does the thing
|
||||
class AdminCommands:
|
||||
"""Basic Admin commands for the admin console. This class is used to create, delete, and list users. It also has the ability to change passwords for users."""
|
||||
|
||||
def __init__(self, db_path=None):
|
||||
"""Default Constructor for the AdminCommands class."""
|
||||
if db_path is None:
|
||||
self.db = Database()
|
||||
else:
|
||||
self.db = Database(db_path=db_path)
|
||||
log.info("AdminCommands initialized with database connection.")
|
||||
log.debug("location: {}", self.db.db_path)
|
||||
|
||||
def create_password(self, password: str) -> tuple[str, str]:
|
||||
"""Create a hashed password and a salt for the password.
|
||||
|
||||
Args:
|
||||
password (str): the base password to be hashed.
|
||||
|
||||
Returns:
|
||||
tuple[str,str]: a tuple containing the hashed password and the salt used to hash the password.
|
||||
"""
|
||||
salt = self.create_salt()
|
||||
hashed_password = self.hash_password(password)
|
||||
return (hashed_password, salt)
|
||||
|
||||
def create_salt(self) -> str:
|
||||
"""Generate a random 16 digit long salt for the password.
|
||||
|
||||
Returns:
|
||||
str: the randomized salt
|
||||
"""
|
||||
return "".join(
|
||||
random.choices(
|
||||
"abcdefghijklmnopqrstuvwxyzQWERTZUIOPLKJHGFDSAYXCVBNM0123456789", k=16
|
||||
)
|
||||
)
|
||||
|
||||
def create_admin(self):
|
||||
"""Create the admin in the database. This is only used once, when the database is created."""
|
||||
salt = self.create_salt()
|
||||
hashed_password = self.hash_password("admin")
|
||||
self.db.createUser("admin", salt + hashed_password, "admin", salt)
|
||||
|
||||
def create_user(self, username: str, password: str, role: str = "user") -> bool:
|
||||
"""Create a new user in the database.
|
||||
|
||||
Args:
|
||||
username (str): the username of the user to be created.
|
||||
password (str): the password of the user to be created.
|
||||
role (str, optional): the role of the user to be created. Defaults to "user".
|
||||
"""
|
||||
hashed_password, salt = self.create_password(password)
|
||||
status = self.db.createUser(
|
||||
user=username, password=salt + hashed_password, role=role, salt=salt
|
||||
)
|
||||
return status
|
||||
|
||||
def hash_password(self, password: str) -> str:
|
||||
"""Hash a password using SHA256.
|
||||
|
||||
Args:
|
||||
password (str): the password to be hashed.
|
||||
|
||||
Returns:
|
||||
str: the hashed password.
|
||||
"""
|
||||
hashed = hashlib.sha256((password).encode("utf-8")).hexdigest()
|
||||
return hashed
|
||||
|
||||
def list_users(self) -> list[tuple]:
|
||||
"""List all available users in the database.
|
||||
|
||||
Returns:
|
||||
list[tuple]: a list of all users, containing all stored data for each user in a tuple.
|
||||
"""
|
||||
return self.db.getUsers()
|
||||
|
||||
def delete_user(self, username: str):
|
||||
"""Delete a selected user from the database.
|
||||
|
||||
Args:
|
||||
username (str): the username of the user to be deleted.
|
||||
"""
|
||||
self.db.deleteUser(username)
|
||||
|
||||
def change_password(self, username, password):
|
||||
"""change the password for a user.
|
||||
|
||||
Args:
|
||||
username (str): username of the user to change the password for.
|
||||
password (str): the new, non-hashed password to change to.
|
||||
"""
|
||||
hashed_password = self.hash_password(password)
|
||||
self.db.changePassword(username, hashed_password)
|
||||
@@ -1,8 +1,22 @@
|
||||
from .semester import Semester
|
||||
from .database import Database
|
||||
__all__ = [
|
||||
"AdminCommands",
|
||||
"AutoAdder",
|
||||
"AvailChecker",
|
||||
"BookGrabber",
|
||||
"Database",
|
||||
"DocumentationThread",
|
||||
"NewEditionCheckerThread",
|
||||
"recreateElsaFile",
|
||||
"recreateFile",
|
||||
"Catalogue",
|
||||
]
|
||||
|
||||
from .admin_console import AdminCommands
|
||||
from .thread_bookgrabber import BookGrabber
|
||||
from .threads_availchecker import AvailChecker
|
||||
from .threads_autoadder import AutoAdder
|
||||
from .catalogue import Catalogue
|
||||
from .create_file import recreateElsaFile, recreateFile
|
||||
from .database import Database
|
||||
from .documentation_thread import DocumentationThread
|
||||
from .create_file import recreateFile, recreateElsaFile
|
||||
from .thread_bookgrabber import BookGrabber
|
||||
from .thread_neweditions import NewEditionCheckerThread
|
||||
from .threads_autoadder import AutoAdder
|
||||
from .threads_availchecker import AvailChecker
|
||||
|
||||
292
src/backend/catalogue.py
Normal file
292
src/backend/catalogue.py
Normal file
@@ -0,0 +1,292 @@
|
||||
from typing import List
|
||||
|
||||
import regex
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from src.logic import BookData as Book
|
||||
from src.shared.logging import log
|
||||
|
||||
URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||
BASE = "https://rds.ibs-bw.de"
|
||||
|
||||
|
||||
class Catalogue:
|
||||
def __init__(self, timeout=15):
|
||||
self.timeout = timeout
|
||||
reachable = self.check_connection()
|
||||
if not reachable:
|
||||
log.error("No internet connection available.")
|
||||
raise ConnectionError("No internet connection available.")
|
||||
|
||||
def check_connection(self):
|
||||
try:
|
||||
response = requests.get("https://www.google.com", timeout=self.timeout)
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
except requests.exceptions.RequestException as e:
|
||||
log.error(f"Could not connect to google.com: {e}")
|
||||
|
||||
def search_book(self, searchterm: str):
|
||||
response = requests.get(URL.format(searchterm), timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def search(self, link: str):
|
||||
response = requests.get(link, timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def get_book_links(self, searchterm: str) -> List[str]:
|
||||
response = self.search_book(searchterm)
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
links = soup.find_all("a", class_="title getFull")
|
||||
res: List[str] = []
|
||||
for link in links:
|
||||
res.append(BASE + link["href"]) # type: ignore
|
||||
return res
|
||||
|
||||
def get_book(self, searchterm: str):
|
||||
log.info(f"Searching for term: {searchterm}")
|
||||
|
||||
links = self.get_book_links(searchterm)
|
||||
print(links)
|
||||
for elink in links:
|
||||
result = self.search(elink)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
|
||||
# Optional (unchanged): title and ppn if you need them
|
||||
title_el = soup.find("div", class_="headline text")
|
||||
title = title_el.get_text(strip=True) if title_el else None
|
||||
|
||||
ppn_el = soup.find(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"
|
||||
)
|
||||
# in ppn_el, get text of div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
|
||||
ppn = (
|
||||
ppn_el.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
).get_text(strip=True)
|
||||
if ppn_el
|
||||
else None
|
||||
)
|
||||
|
||||
# get edition text at div class col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION
|
||||
edition_el = soup.find(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION"
|
||||
)
|
||||
edition = (
|
||||
edition_el.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
).get_text(strip=True)
|
||||
if edition_el
|
||||
else None
|
||||
)
|
||||
|
||||
authors = soup.find_all(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
|
||||
)
|
||||
author = None
|
||||
if authors:
|
||||
# get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
|
||||
author_names = []
|
||||
for author in authors:
|
||||
panel = author.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
)
|
||||
if panel:
|
||||
links = panel.find_all("a")
|
||||
for link in links:
|
||||
author_names.append(link.text.strip())
|
||||
author = (
|
||||
";".join(author_names) if len(author_names) > 1 else author_names[0]
|
||||
)
|
||||
signature = None
|
||||
|
||||
panel = soup.select_one("div.panel-body")
|
||||
if panel:
|
||||
# Collect the RDS_* blocks in order, using the 'space' divs as separators
|
||||
groups = []
|
||||
cur = {}
|
||||
for node in panel.select(
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
|
||||
):
|
||||
classes = node.get("class", [])
|
||||
# Separator between entries
|
||||
if "space" in classes:
|
||||
if cur:
|
||||
groups.append(cur)
|
||||
cur = {}
|
||||
continue
|
||||
|
||||
# Read the value from the corresponding panel cell
|
||||
val_el = node.select_one(".rds-dl-panel")
|
||||
val = (
|
||||
val_el.get_text(" ", strip=True)
|
||||
if val_el
|
||||
else node.get_text(" ", strip=True)
|
||||
)
|
||||
|
||||
if "RDS_SIGNATURE" in classes:
|
||||
cur["signature"] = val
|
||||
elif "RDS_STATUS" in classes:
|
||||
cur["status"] = val
|
||||
elif "RDS_LOCATION" in classes:
|
||||
cur["location"] = val
|
||||
|
||||
if cur: # append the last group if not followed by a space
|
||||
groups.append(cur)
|
||||
|
||||
# Find the signature for the entry whose location mentions "Semesterapparat"
|
||||
for g in groups:
|
||||
loc = g.get("location", "").lower()
|
||||
if "semesterapparat" in loc:
|
||||
signature = g.get("signature")
|
||||
return Book(
|
||||
title=title,
|
||||
ppn=ppn,
|
||||
signature=signature,
|
||||
library_location=loc.split("-")[-1],
|
||||
link=elink,
|
||||
author=author,
|
||||
edition=edition,
|
||||
)
|
||||
else:
|
||||
return Book(
|
||||
title=title,
|
||||
ppn=ppn,
|
||||
signature=signature,
|
||||
library_location=loc.split("\n\n")[-1],
|
||||
link=elink,
|
||||
author=author,
|
||||
edition=edition,
|
||||
)
|
||||
|
||||
def get(self, ppn: str) -> Book | None:
|
||||
# based on PPN, get title, people, edition, year, language, pages, isbn,
|
||||
link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}"
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
|
||||
def get_ppn(self, searchterm: str) -> str | None:
|
||||
links = self.get_book_links(searchterm)
|
||||
ppn = None
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
print(link)
|
||||
ppn = link.split("/")[-1]
|
||||
if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn):
|
||||
return ppn
|
||||
return ppn
|
||||
|
||||
def get_semesterapparat_number(self, searchterm: str) -> int:
|
||||
links = self.get_book_links(searchterm)
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
|
||||
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||
for location_el in locations:
|
||||
if "Semesterapparat-" in location_el.text:
|
||||
match = regex.search(r"Semesterapparat-(\d+)", location_el.text)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
if "Handbibliothek-" in location_el.text:
|
||||
return location_el.text.strip().split("\n\n")[-1].strip()
|
||||
return location_el.text.strip().split("\n\n")[-1].strip()
|
||||
return 0
|
||||
|
||||
def get_author(self, link: str) -> str:
|
||||
links = self.get_book_links(f"kid:{link}")
|
||||
author = None
|
||||
for link in links:
|
||||
# print(link)
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
# get all authors, return them as a string seperated by ;
|
||||
authors = soup.find_all(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
|
||||
)
|
||||
if authors:
|
||||
# get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
|
||||
author_names = []
|
||||
for author in authors:
|
||||
panel = author.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
)
|
||||
if panel:
|
||||
links = panel.find_all("a")
|
||||
for link in links:
|
||||
author_names.append(link.text.strip())
|
||||
author = "; ".join(author_names)
|
||||
return author
|
||||
|
||||
def get_signature(self, isbn: str):
|
||||
links = self.get_book_links(f"{isbn}")
|
||||
signature = None
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
panel = soup.select_one("div.panel-body")
|
||||
if panel:
|
||||
# Collect the RDS_* blocks in order, using the 'space' divs as separators
|
||||
groups = []
|
||||
cur = {}
|
||||
for node in panel.select(
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
|
||||
):
|
||||
classes = node.get("class", [])
|
||||
# Separator between entries
|
||||
if "space" in classes:
|
||||
if cur:
|
||||
groups.append(cur)
|
||||
cur = {}
|
||||
continue
|
||||
|
||||
# Read the value from the corresponding panel cell
|
||||
val_el = node.select_one(".rds-dl-panel")
|
||||
val = (
|
||||
val_el.get_text(" ", strip=True)
|
||||
if val_el
|
||||
else node.get_text(" ", strip=True)
|
||||
)
|
||||
|
||||
if "RDS_SIGNATURE" in classes:
|
||||
cur["signature"] = val
|
||||
elif "RDS_STATUS" in classes:
|
||||
cur["status"] = val
|
||||
elif "RDS_LOCATION" in classes:
|
||||
cur["location"] = val
|
||||
|
||||
if cur: # append the last group if not followed by a space
|
||||
groups.append(cur)
|
||||
|
||||
# Find the signature for the entry whose location mentions "Semesterapparat"
|
||||
for g in groups:
|
||||
print(g)
|
||||
loc = g.get("location", "").lower()
|
||||
if "semesterapparat" in loc:
|
||||
signature = g.get("signature")
|
||||
return signature
|
||||
else:
|
||||
signature = g.get("signature")
|
||||
return signature
|
||||
print("No signature found")
|
||||
return signature
|
||||
|
||||
def in_library(self, ppn: str) -> bool:
|
||||
if ppn is None:
|
||||
return False
|
||||
links = self.get_book_links(f"kid:{ppn}")
|
||||
return len(links) > 0
|
||||
|
||||
def get_location(self, ppn: str) -> str | None:
|
||||
if ppn is None:
|
||||
return None
|
||||
link = self.get_book(f"{ppn}")
|
||||
if link is None:
|
||||
return None
|
||||
return link.library_location
|
||||
@@ -1,8 +1,8 @@
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sqlite3 as sql
|
||||
import sys
|
||||
import tempfile
|
||||
from dataclasses import asdict
|
||||
from pathlib import Path
|
||||
@@ -10,9 +10,7 @@ from string import ascii_lowercase as lower
|
||||
from string import digits, punctuation
|
||||
from typing import Any, List, Optional, Tuple, Union
|
||||
|
||||
import loguru
|
||||
|
||||
from src import LOG_DIR, settings, DATABASE_DIR
|
||||
from src import DATABASE_DIR, settings
|
||||
from src.backend.db import (
|
||||
CREATE_ELSA_FILES_TABLE,
|
||||
CREATE_ELSA_MEDIA_TABLE,
|
||||
@@ -21,6 +19,7 @@ from src.backend.db import (
|
||||
CREATE_TABLE_FILES,
|
||||
CREATE_TABLE_MEDIA,
|
||||
CREATE_TABLE_MESSAGES,
|
||||
CREATE_TABLE_NEWEDITIONS,
|
||||
CREATE_TABLE_PROF,
|
||||
CREATE_TABLE_SUBJECTS,
|
||||
CREATE_TABLE_USER,
|
||||
@@ -28,17 +27,10 @@ from src.backend.db import (
|
||||
from src.errors import AppPresentError, NoResultError
|
||||
from src.logic import ELSA, Apparat, ApparatData, BookData, Prof
|
||||
from src.logic.constants import SEMAP_MEDIA_ACCOUNTS
|
||||
from src.logic.semester import Semester
|
||||
from src.shared.logging import log
|
||||
from src.utils.blob import create_blob
|
||||
|
||||
from .semester import Semester
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
|
||||
|
||||
|
||||
ascii_lowercase = lower + digits + punctuation
|
||||
|
||||
|
||||
@@ -68,11 +60,125 @@ class Database:
|
||||
self.db_path = db_path
|
||||
log.debug(f"Database path: {self.db_path}")
|
||||
self.db_initialized = False
|
||||
self.startup_check()
|
||||
|
||||
def startup_check(self):
|
||||
# check existence of all tables. if any is missing, recreate the table
|
||||
if not self.db_initialized:
|
||||
self.initializeDatabase()
|
||||
tables = self.get_db_contents()
|
||||
tables = [t[1] for t in tables] if tables is not None else []
|
||||
required_tables = [
|
||||
"semesterapparat",
|
||||
"messages",
|
||||
"media",
|
||||
"files",
|
||||
"prof",
|
||||
"user",
|
||||
"subjects",
|
||||
"elsa",
|
||||
"elsa_files",
|
||||
"elsa_media",
|
||||
"neweditions",
|
||||
]
|
||||
|
||||
for table in required_tables:
|
||||
if table not in tables:
|
||||
log.critical(f"Table {table} is missing, recreating...")
|
||||
self.create_table(table)
|
||||
|
||||
def create_table(self, table_name: str):
|
||||
match table_name:
|
||||
case "semesterapparat":
|
||||
query = CREATE_TABLE_APPARAT
|
||||
case "messages":
|
||||
query = CREATE_TABLE_MESSAGES
|
||||
case "media":
|
||||
query = CREATE_TABLE_MEDIA
|
||||
case "files":
|
||||
query = CREATE_TABLE_FILES
|
||||
case "prof":
|
||||
query = CREATE_TABLE_PROF
|
||||
case "user":
|
||||
query = CREATE_TABLE_USER
|
||||
case "subjects":
|
||||
query = CREATE_TABLE_SUBJECTS
|
||||
case "elsa":
|
||||
query = CREATE_ELSA_TABLE
|
||||
case "elsa_files":
|
||||
query = CREATE_ELSA_FILES_TABLE
|
||||
case "elsa_media":
|
||||
query = CREATE_ELSA_MEDIA_TABLE
|
||||
case "neweditions":
|
||||
query = CREATE_TABLE_NEWEDITIONS
|
||||
case _:
|
||||
log.error(f"Table {table_name} is not a valid table name")
|
||||
self.query_db(query)
|
||||
|
||||
def initializeDatabase(self):
|
||||
if not self.db_initialized:
|
||||
self.checkDatabaseStatus()
|
||||
self.db_initialized = True
|
||||
# run migrations after initial creation to bring schema up-to-date
|
||||
try:
|
||||
if self.db_path is not None:
|
||||
self.run_migrations()
|
||||
except Exception as e:
|
||||
log.error(f"Error while running migrations: {e}")
|
||||
|
||||
# --- Migration helpers integrated into Database ---
|
||||
def _ensure_migrations_table(self, conn: sql.Connection) -> None:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||
id TEXT PRIMARY KEY,
|
||||
applied_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def _applied_migrations(self, conn: sql.Connection) -> List[str]:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT id FROM schema_migrations ORDER BY id")
|
||||
rows = cursor.fetchall()
|
||||
return [r[0] for r in rows]
|
||||
|
||||
def _apply_sql_file(self, conn: sql.Connection, path: Path) -> None:
|
||||
log.info(f"Applying migration {path.name}")
|
||||
sql_text = path.read_text(encoding="utf-8")
|
||||
cursor = conn.cursor()
|
||||
cursor.executescript(sql_text)
|
||||
cursor.execute(
|
||||
"INSERT OR REPLACE INTO schema_migrations (id) VALUES (?)", (path.name,)
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def run_migrations(self) -> None:
|
||||
"""Apply unapplied .sql migrations from src/backend/migrations using this Database's connection."""
|
||||
migrations_dir = Path(__file__).parent / "migrations"
|
||||
if not migrations_dir.exists():
|
||||
log.debug("Migrations directory does not exist, skipping migrations")
|
||||
return
|
||||
|
||||
conn = self.connect()
|
||||
try:
|
||||
self._ensure_migrations_table(conn)
|
||||
applied = set(self._applied_migrations(conn))
|
||||
|
||||
migration_files = sorted(
|
||||
[p for p in migrations_dir.iterdir() if p.suffix == ".sql"]
|
||||
)
|
||||
for m in migration_files:
|
||||
if m.name in applied:
|
||||
log.debug(f"Skipping already applied migration {m.name}")
|
||||
continue
|
||||
self._apply_sql_file(conn, m)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# --- end migration helpers ---
|
||||
|
||||
def overwritePath(self, new_db_path: str):
|
||||
log.debug("got new path, overwriting")
|
||||
@@ -94,7 +200,7 @@ class Database:
|
||||
self.create_tables()
|
||||
self.insertSubjects()
|
||||
|
||||
def getElsaMediaID(self, work_author, signature, pages):
|
||||
def getElsaMediaID(self, work_author: str, signature: str, pages: str):
|
||||
query = (
|
||||
"SELECT id FROM elsa_media WHERE work_author=? AND signature=? AND pages=?"
|
||||
)
|
||||
@@ -110,7 +216,7 @@ class Database:
|
||||
query = "SELECT type FROM elsa_media WHERE id=?"
|
||||
return self.query_db(query, (id,), one=True)[0]
|
||||
|
||||
def get_db_contents(self) -> Union[List[Tuple], None]:
|
||||
def get_db_contents(self) -> Union[List[Tuple[Any]], None]:
|
||||
"""
|
||||
Get the contents of the
|
||||
|
||||
@@ -132,7 +238,13 @@ class Database:
|
||||
Returns:
|
||||
sql.Connection: The active connection to the database
|
||||
"""
|
||||
return sql.connect(self.db_path)
|
||||
conn = sql.connect(self.db_path)
|
||||
# Fast pragmas suitable for a desktop app DB
|
||||
conn.execute("PRAGMA journal_mode=WAL;")
|
||||
conn.execute("PRAGMA synchronous=NORMAL;")
|
||||
conn.execute("PRAGMA temp_store=MEMORY;")
|
||||
conn.execute("PRAGMA mmap_size=134217728;") # 128MB
|
||||
return conn
|
||||
|
||||
def close_connection(self, conn: sql.Connection):
|
||||
"""
|
||||
@@ -148,20 +260,10 @@ class Database:
|
||||
"""
|
||||
Create the tables in the database
|
||||
"""
|
||||
conn = self.connect()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(CREATE_TABLE_APPARAT)
|
||||
cursor.execute(CREATE_TABLE_MESSAGES)
|
||||
cursor.execute(CREATE_TABLE_MEDIA)
|
||||
cursor.execute(CREATE_TABLE_FILES)
|
||||
cursor.execute(CREATE_TABLE_PROF)
|
||||
cursor.execute(CREATE_TABLE_USER)
|
||||
cursor.execute(CREATE_TABLE_SUBJECTS)
|
||||
cursor.execute(CREATE_ELSA_TABLE)
|
||||
cursor.execute(CREATE_ELSA_FILES_TABLE)
|
||||
cursor.execute(CREATE_ELSA_MEDIA_TABLE)
|
||||
conn.commit()
|
||||
self.close_connection(conn)
|
||||
# Bootstrapping of tables is handled via migrations. Run migrations instead
|
||||
# of executing the hard-coded DDL here. Migrations are idempotent and
|
||||
# contain the CREATE TABLE IF NOT EXISTS statements.
|
||||
self.run_migrations()
|
||||
|
||||
def insertInto(self, query: str, params: Tuple) -> None:
|
||||
"""
|
||||
@@ -173,16 +275,31 @@ class Database:
|
||||
"""
|
||||
conn = self.connect()
|
||||
cursor = conn.cursor()
|
||||
log.debug(f"Inserting {params} into database with query {query}")
|
||||
log.debug(f"Inserting into DB: {query}")
|
||||
cursor.execute(query, params)
|
||||
conn.commit()
|
||||
self.close_connection(conn)
|
||||
|
||||
def getWebADISAuth(self) -> Tuple[str, str]:
|
||||
"""
|
||||
Get the WebADIS authentication data from the database
|
||||
|
||||
Returns:
|
||||
Tuple[str, str]: The username and password for WebADIS
|
||||
"""
|
||||
result = self.query_db(
|
||||
"SELECT username, password FROM webadis_login WHERE effective_range='SAP'",
|
||||
one=True,
|
||||
)
|
||||
if result is None:
|
||||
return ("", "")
|
||||
return (result[0], result[1])
|
||||
|
||||
@log.catch
|
||||
def query_db(
|
||||
self,
|
||||
query: str,
|
||||
args: Tuple[Any, Any] = (), # type:ignore
|
||||
args: Tuple[Any] = (), # type:ignore
|
||||
one: bool = False, # type:ignore
|
||||
) -> Union[Tuple[Any, Any], List[Tuple[Any, Any]]]:
|
||||
"""
|
||||
@@ -201,12 +318,12 @@ class Database:
|
||||
logs_query = query
|
||||
|
||||
logs_args = args
|
||||
if "fileblob" in query:
|
||||
# set fileblob arg in logger to "too long"
|
||||
logs_query = query
|
||||
fileblob_location = query.find("fileblob")
|
||||
# remove fileblob from query
|
||||
logs_query = query[:fileblob_location] + "fileblob = too long"
|
||||
# if "fileblob" in query:
|
||||
# # set fileblob arg in logger to "too long"
|
||||
# logs_query = query
|
||||
# fileblob_location = query.find("fileblob")
|
||||
# # remove fileblob from query
|
||||
# logs_query = query[:fileblob_location] + "fileblob = too long"
|
||||
|
||||
log_message = f"Querying database with query {logs_query}, args: {logs_args}"
|
||||
# if "INSERT" in query:
|
||||
@@ -332,49 +449,66 @@ class Database:
|
||||
"""
|
||||
return self.query_db("SELECT id FROM media ORDER BY id DESC", one=True)[0]
|
||||
|
||||
def searchBook(self, data: dict[str, str]) -> list[tuple[BookData, int]]:
|
||||
def searchBook(
|
||||
self, data: dict[str, str]
|
||||
) -> Optional[list[tuple["BookData", int, int]]]:
|
||||
"""
|
||||
Search a book in the database based on the sent data.
|
||||
Search a book in the database using regex against signature/title.
|
||||
|
||||
Args:
|
||||
data (dict[str, str]): A dictionary containing the data to be searched for. The dictionary can contain the following:
|
||||
- signature: The signature of the book
|
||||
- title: The title of the book
|
||||
data: may contain:
|
||||
- "signature": regex to match against BookData.signature
|
||||
- "title": regex to match against BookData.title
|
||||
|
||||
Returns:
|
||||
list[tuple[BookData, int]]: A list of tuples containing the wrapped Metadata and the id of the book
|
||||
list of (BookData, app_id, prof_id) tuples, or None if invalid args
|
||||
"""
|
||||
rdata = self.query_db("SELECT * FROM media WHERE deleted=0")
|
||||
# log.debug(rdata, len(rdata))
|
||||
|
||||
# Determine mode (kept compatible with your original logic)
|
||||
mode = 0
|
||||
if len(data) == 1:
|
||||
if "signature" in data.keys():
|
||||
mode = 1
|
||||
elif "title" in data.keys():
|
||||
mode = 2
|
||||
elif len(data) == 2:
|
||||
if len(data) == 1 and "signature" in data:
|
||||
mode = 1
|
||||
elif len(data) == 1 and "title" in data:
|
||||
mode = 2
|
||||
elif len(data) == 2 and "signature" in data and "title" in data:
|
||||
mode = 3
|
||||
else:
|
||||
return None
|
||||
ret = []
|
||||
for book in rdata:
|
||||
bookdata = BookData().from_string(book[1])
|
||||
app_id = book[2]
|
||||
prof_id = book[3]
|
||||
|
||||
def _compile(expr: str) -> re.Pattern:
|
||||
try:
|
||||
return re.compile(expr, re.IGNORECASE | re.UNICODE)
|
||||
except re.error:
|
||||
# If user provided a broken regex, treat it as a literal
|
||||
return re.compile(re.escape(expr), re.IGNORECASE | re.UNICODE)
|
||||
|
||||
sig_re = _compile(data["signature"]) if mode in (1, 3) else None
|
||||
title_re = _compile(data["title"]) if mode in (2, 3) else None
|
||||
|
||||
# Fetch candidates once
|
||||
rows = self.query_db("SELECT * FROM media WHERE deleted=0")
|
||||
|
||||
results: list[tuple["BookData", int, int]] = []
|
||||
for row in rows:
|
||||
bookdata = BookData().from_string(
|
||||
row[1]
|
||||
) # assumes row[1] is the serialized bookdata
|
||||
app_id = row[2]
|
||||
prof_id = row[3]
|
||||
|
||||
sig_val = bookdata.signature
|
||||
title_val = bookdata.title
|
||||
if mode == 1:
|
||||
if data["signature"] in bookdata.signature:
|
||||
ret.append((bookdata, app_id, prof_id))
|
||||
if sig_re.search(sig_val):
|
||||
results.append((bookdata, app_id, prof_id))
|
||||
elif mode == 2:
|
||||
if data["title"] in bookdata.title:
|
||||
ret.append((bookdata, app_id, prof_id))
|
||||
elif mode == 3:
|
||||
if (
|
||||
data["signature"] in bookdata.signature
|
||||
and data["title"] in bookdata.title
|
||||
):
|
||||
ret.append((bookdata, app_id, prof_id))
|
||||
# log.debug(ret)
|
||||
return ret
|
||||
if title_re.search(title_val):
|
||||
results.append((bookdata, app_id, prof_id))
|
||||
else: # mode == 3
|
||||
if sig_re.search(sig_val) and title_re.search(title_val):
|
||||
results.append((bookdata, app_id, prof_id))
|
||||
|
||||
return results
|
||||
|
||||
def setAvailability(self, book_id: str, available: str):
|
||||
"""
|
||||
@@ -402,7 +536,7 @@ class Database:
|
||||
"""
|
||||
result = self.query_db(
|
||||
"SELECT id FROM media WHERE bookdata=? AND app_id=? AND prof_id=?",
|
||||
(dump_pickle(bookdata), app_id, prof_id),
|
||||
(bookdata.to_dict, app_id, prof_id),
|
||||
one=True,
|
||||
)
|
||||
return result[0]
|
||||
@@ -435,6 +569,7 @@ class Database:
|
||||
deleted (int, optional): The state of the book. Set to 1 to include deleted ones. Defaults to 0.
|
||||
|
||||
Returns:
|
||||
|
||||
list[dict[int, BookData, int]]: A list of dictionaries containing the id, the metadata of the book and the availability of the book
|
||||
"""
|
||||
qdata = self.query_db(
|
||||
@@ -451,6 +586,68 @@ class Database:
|
||||
ret_result.append(data)
|
||||
return ret_result
|
||||
|
||||
def getAllBooks(self) -> list[dict[str, Union[int, BookData]]]:
|
||||
"""
|
||||
Get all books in the database that are not set as deleted
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[dict[str, Union[int, BookData]]]
|
||||
A list of dictionaries containing the id and the metadata of the book
|
||||
"""
|
||||
# return all books in the database
|
||||
qdata = self.query_db("SELECT id,bookdata FROM media WHERE deleted=0")
|
||||
ret_result: list[dict[str, Any]] = []
|
||||
if qdata is None:
|
||||
return []
|
||||
for result_a in qdata:
|
||||
data: dict[str, Any] = {"id": int, "bookdata": BookData}
|
||||
data["id"] = result_a[0]
|
||||
data["bookdata"] = BookData().from_string(result_a[1])
|
||||
|
||||
ret_result.append(data)
|
||||
return ret_result
|
||||
|
||||
def getApparatNrByBookId(self, book_id):
|
||||
appNr = self.query_db(
|
||||
"SELECT appnr FROM semesterapparat WHERE id IN (SELECT app_id FROM media WHERE id=?)",
|
||||
(book_id,),
|
||||
one=True,
|
||||
)
|
||||
return appNr[0] if appNr else None
|
||||
|
||||
def getBooksByProfId(
|
||||
self, prof_id: int, deleted: int = 0
|
||||
) -> list[dict[str, Union[int, BookData]]]:
|
||||
"""
|
||||
Get the Books based on the professor id
|
||||
|
||||
Parameters
|
||||
----------
|
||||
prof_id : int
|
||||
The ID of the professor
|
||||
deleted : int, optional
|
||||
If set to 1, it will include deleted books, by default 0
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[dict[str, Union[int, BookData]]]
|
||||
A list of dictionaries containing the id, the metadata of the book and the availability of the book
|
||||
"""
|
||||
qdata = self.query_db(
|
||||
f"SELECT id,bookdata,available FROM media WHERE prof_id={prof_id} AND (deleted={deleted if deleted == 0 else '1 OR deleted=0'})"
|
||||
)
|
||||
ret_result = []
|
||||
if qdata is None:
|
||||
return []
|
||||
for result_a in qdata:
|
||||
data: dict[str, Any] = {"id": int, "bookdata": BookData, "available": int}
|
||||
data["id"] = result_a[0]
|
||||
data["bookdata"] = BookData().from_string(result_a[1])
|
||||
data["available"] = result_a[2]
|
||||
ret_result.append(data)
|
||||
return ret_result
|
||||
|
||||
def updateBookdata(self, book_id: int, bookdata: BookData):
|
||||
"""
|
||||
Update the bookdata in the database
|
||||
@@ -472,6 +669,16 @@ class Database:
|
||||
"""
|
||||
self.query_db("UPDATE media SET deleted=1 WHERE id=?", (book_id,))
|
||||
|
||||
def deleteBooks(self, ids: list[int]):
|
||||
"""
|
||||
Delete multiple books from the database
|
||||
|
||||
Args:
|
||||
ids (list[int]): A list of book ids to be deleted
|
||||
"""
|
||||
query = f"UPDATE media SET deleted=1 WHERE id IN ({','.join(['?'] * len(ids))})"
|
||||
self.query_db(query, tuple(ids))
|
||||
|
||||
# File Interactions
|
||||
def getBlob(self, filename: str, app_id: Union[str, int]) -> bytes:
|
||||
"""
|
||||
@@ -525,11 +732,12 @@ class Database:
|
||||
str: The filename of the recreated file
|
||||
"""
|
||||
blob = self.getBlob(filename, app_id)
|
||||
log.debug(blob)
|
||||
tempdir = settings.database.temp.expanduser()
|
||||
if not tempdir.exists():
|
||||
tempdir.mkdir(parents=True, exist_ok=True)
|
||||
file = tempfile.NamedTemporaryFile(
|
||||
delete=False, dir=tempdir_path, mode="wb", suffix=f".{filetype}"
|
||||
delete=False, dir=tempdir, mode="wb", suffix=f".{filetype}"
|
||||
)
|
||||
file.write(blob)
|
||||
# log.debug("file created")
|
||||
@@ -701,6 +909,20 @@ class Database:
|
||||
else:
|
||||
return prof[0]
|
||||
|
||||
def getProfMailById(self, prof_id: Union[str, int]) -> str:
|
||||
"""get the mail of a professor based on the id
|
||||
|
||||
Args:
|
||||
prof_id (Union[str,int]): the id of the professor
|
||||
|
||||
Returns:
|
||||
str: the mail of the professor
|
||||
"""
|
||||
mail = self.query_db("SELECT mail FROM prof WHERE id=?", (prof_id,), one=True)[
|
||||
0
|
||||
]
|
||||
return mail if mail is not None else ""
|
||||
|
||||
def getTitleById(self, prof_id: Union[str, int]) -> str:
|
||||
"""get the title of a professor based on the id
|
||||
|
||||
@@ -877,6 +1099,23 @@ class Database:
|
||||
(newDate, today, app_id),
|
||||
)
|
||||
|
||||
def getId(self, apparat_name) -> Optional[int]:
|
||||
"""get the id of an apparat based on the name
|
||||
|
||||
Args:
|
||||
apparat_name (str): the name of the apparat e.g. "Semesterapparat 1"
|
||||
|
||||
Returns:
|
||||
Optional[int]: the id of the apparat, if the apparat is not found, None is returned
|
||||
"""
|
||||
data = self.query_db(
|
||||
"SELECT id FROM semesterapparat WHERE name=?", (apparat_name,), one=True
|
||||
)
|
||||
if data is None:
|
||||
return None
|
||||
else:
|
||||
return data[0]
|
||||
|
||||
def getApparatId(self, apparat_name) -> Optional[int]:
|
||||
"""get the id of an apparat based on the name
|
||||
|
||||
@@ -1014,22 +1253,22 @@ class Database:
|
||||
self.close_connection(conn)
|
||||
return ret
|
||||
|
||||
def deleteApparat(self, app_id: Union[str, int], semester):
|
||||
def deleteApparat(self, apparat: Apparat, semester: str):
|
||||
"""Delete an apparat from the database
|
||||
|
||||
Args:
|
||||
app_id (Union[str, int]): the id of the apparat
|
||||
apparat: (Apparat): the apparat to be deleted
|
||||
semester (str): the semester the apparat should be deleted from
|
||||
"""
|
||||
log.info(f"Deleting apparat with id {app_id} in semester {semester}")
|
||||
apparat_nr = apparat.appnr
|
||||
app_id = self.getId(apparat.name)
|
||||
self.query_db(
|
||||
"UPDATE semesterapparat SET deletion_status=1, deleted_date=? WHERE appnr=?",
|
||||
(semester, app_id),
|
||||
)
|
||||
self.query_db(
|
||||
"UPDATE media SET deleted=1 WHERE app_id=?",
|
||||
(app_id,),
|
||||
"UPDATE semesterapparat SET deletion_status=1, deleted_date=? WHERE appnr=? AND name=?",
|
||||
(semester, apparat_nr, apparat.name),
|
||||
)
|
||||
# delete all books associated with the app_id
|
||||
# print(apparat_nr, app_id)
|
||||
self.query_db("UPDATE media SET deleted=1 WHERE app_id=?", (app_id,))
|
||||
|
||||
def isEternal(self, id):
|
||||
"""check if the apparat is eternal (dauerapparat)
|
||||
@@ -1101,11 +1340,11 @@ class Database:
|
||||
else False
|
||||
)
|
||||
|
||||
def checkApparatExistsById(self, app_id: Union[str, int]) -> bool:
|
||||
"""a check to see if the apparat is already present in the database, based on the id
|
||||
def checkApparatExistsByNr(self, app_nr: Union[str, int]) -> bool:
|
||||
"""a check to see if the apparat is already present in the database, based on the nr. This query will exclude deleted apparats
|
||||
|
||||
Args:
|
||||
app_id (Union[str, int]): the id of the apparat
|
||||
app_nr (Union[str, int]): the id of the apparat
|
||||
|
||||
Returns:
|
||||
bool: True if the apparat is present, False if not
|
||||
@@ -1113,7 +1352,9 @@ class Database:
|
||||
return (
|
||||
True
|
||||
if self.query_db(
|
||||
"SELECT appnr FROM semesterapparat WHERE appnr=?", (app_id,), one=True
|
||||
"SELECT id FROM semesterapparat WHERE appnr=? and deletion_status=0",
|
||||
(app_nr,),
|
||||
one=True,
|
||||
)
|
||||
else False
|
||||
)
|
||||
@@ -1498,7 +1739,7 @@ class Database:
|
||||
tempdir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
file = tempfile.NamedTemporaryFile(
|
||||
delete=False, dir=tempdir_path, mode="wb", suffix=f".{filetype}"
|
||||
delete=False, dir=tempdir, mode="wb", suffix=f".{filetype}"
|
||||
)
|
||||
file.write(blob)
|
||||
# log.debug("file created")
|
||||
@@ -1561,9 +1802,9 @@ class Database:
|
||||
telnr = profdata.telnr
|
||||
title = profdata.title
|
||||
|
||||
query = f"INSERT INTO prof (fname, lname, fullname, mail, telnr,titel) VALUES ('{fname}','{lname}','{fullname}','{mail}','{telnr}','{title}')"
|
||||
query = "INSERT INTO prof (fname, lname, fullname, mail, telnr, titel) VALUES (?,?,?,?,?,?)"
|
||||
log.debug(query)
|
||||
cursor.execute(query)
|
||||
cursor.execute(query, (fname, lname, fullname, mail, telnr, title))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
@@ -1606,10 +1847,10 @@ class Database:
|
||||
fullname = profdata["profname"]
|
||||
else:
|
||||
fullname = profdata.name()
|
||||
query = f"SELECT id FROM prof WHERE fullname = '{fullname}'"
|
||||
query = "SELECT id FROM prof WHERE fullname = ?"
|
||||
log.debug(query)
|
||||
|
||||
cursor.execute(query)
|
||||
cursor.execute(query, (fullname,))
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
return result[0]
|
||||
@@ -1624,10 +1865,10 @@ class Database:
|
||||
"""
|
||||
conn = self.connect()
|
||||
cursor = conn.cursor()
|
||||
query = f"SELECT * FROM prof WHERE fullname = '{fullname}'"
|
||||
query = "SELECT * FROM prof WHERE fullname = ?"
|
||||
log.debug(query)
|
||||
|
||||
result = cursor.execute(query).fetchone()
|
||||
result = cursor.execute(query, (fullname,)).fetchone()
|
||||
if result:
|
||||
return Prof().from_tuple(result)
|
||||
else:
|
||||
@@ -1643,8 +1884,8 @@ class Database:
|
||||
|
||||
int | None: The id of the prof or None if not found
|
||||
"""
|
||||
query = f"SELECT prof_id from semesterapparat WHERE appnr = '{apprarat_id}' and deletion_status = 0"
|
||||
data = self.query_db(query)
|
||||
query = "SELECT prof_id from semesterapparat WHERE appnr = ? and deletion_status = 0"
|
||||
data = self.query_db(query, (apprarat_id,))
|
||||
if data:
|
||||
log.info("Prof ID: " + str(data[0][0]))
|
||||
return data[0][0]
|
||||
@@ -1655,20 +1896,13 @@ class Database:
|
||||
# get book data
|
||||
new_apparat_id = apparat
|
||||
new_prof_id = self.getProfIDByApparat(new_apparat_id)
|
||||
query = f"""
|
||||
INSERT INTO media (bookdata, app_id, prof_id, deleted, available, reservation)
|
||||
SELECT
|
||||
bookdata,
|
||||
'{new_apparat_id}',
|
||||
'{new_prof_id}',
|
||||
0,
|
||||
available,
|
||||
reservation
|
||||
FROM media
|
||||
where id = '{book_id}'"""
|
||||
query = (
|
||||
"INSERT INTO media (bookdata, app_id, prof_id, deleted, available, reservation) "
|
||||
"SELECT bookdata, ?, ?, 0, available, reservation FROM media WHERE id = ?"
|
||||
)
|
||||
connection = self.connect()
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(query)
|
||||
cursor.execute(query, (new_apparat_id, new_prof_id, book_id))
|
||||
connection.commit()
|
||||
connection.close()
|
||||
|
||||
@@ -1680,16 +1914,18 @@ class Database:
|
||||
appratat (int): the ID of the new apparat
|
||||
"""
|
||||
# get book data
|
||||
query = f"UPDATE media SET app_id = '{appratat}' WHERE id = '{book_id}'"
|
||||
query = "UPDATE media SET app_id = ? WHERE id = ?"
|
||||
connection = self.connect()
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(query)
|
||||
cursor.execute(query, (appratat, book_id))
|
||||
connection.commit()
|
||||
connection.close()
|
||||
|
||||
def getApparatNameByAppNr(self, appnr: int):
|
||||
query = f"SELECT name FROM semesterapparat WHERE appnr = '{appnr}' and deletion_status = 0"
|
||||
data = self.query_db(query)
|
||||
query = (
|
||||
"SELECT name FROM semesterapparat WHERE appnr = ? and deletion_status = 0"
|
||||
)
|
||||
data = self.query_db(query, (appnr,))
|
||||
if data:
|
||||
return data[0][0]
|
||||
else:
|
||||
@@ -1701,4 +1937,72 @@ class Database:
|
||||
cursor.execute(query, args)
|
||||
result = cursor.fetchone()
|
||||
connection.close()
|
||||
return result
|
||||
return result
|
||||
|
||||
def getBookIdByPPN(self, ppn: str) -> int:
|
||||
query = "SELECT id FROM media WHERE bookdata LIKE ?"
|
||||
data = self.query_db(query, (f"%{ppn}%",))
|
||||
if data:
|
||||
return data[0][0]
|
||||
else:
|
||||
return None
|
||||
|
||||
def getNewEditionsByApparat(self, apparat_id: int) -> list[BookData]:
|
||||
"""Get all new editions for a specific apparat
|
||||
|
||||
Args:
|
||||
apparat_id (int): the id of the apparat
|
||||
|
||||
Returns:
|
||||
list[tuple]: A list of tuples containing the new editions data
|
||||
"""
|
||||
query = "SELECT * FROM neweditions WHERE for_apparat=? AND ordered=0"
|
||||
results = self.query_db(query, (apparat_id,))
|
||||
res = []
|
||||
for result in results:
|
||||
# keep only new edition payload; old edition can be reconstructed if needed
|
||||
res.append(BookData().from_string(result[1]))
|
||||
return res
|
||||
|
||||
def setOrdered(self, newBook_id: int):
|
||||
query = "UPDATE neweditions SET ordered=1 WHERE id=?"
|
||||
self.query_db(query, (newBook_id,))
|
||||
|
||||
def getBooksWithNewEditions(self, app_id) -> List[BookData]:
|
||||
# select all bookdata from media, based on the old_edition_id in neweditions where for_apparat = app_id; also get the new_edition bookdata
|
||||
|
||||
query = "SELECT m.bookdata, new_bookdata FROM media m JOIN neweditions n ON m.id = n.old_edition_id WHERE n.for_apparat = ?"
|
||||
results = self.query_db(query, (app_id,))
|
||||
# store results in tuple old,new
|
||||
res = []
|
||||
for result in results:
|
||||
oldedition = BookData().from_string(result[0])
|
||||
newedition = BookData().from_string(result[1])
|
||||
res.append((oldedition, newedition))
|
||||
return res
|
||||
|
||||
def getNewEditionId(self, newBook: BookData):
|
||||
query = "SELECT id FROM neweditions WHERE new_bookdata LIKE ?"
|
||||
args = (
|
||||
newBook.isbn[0] if newBook.isbn and len(newBook.isbn) > 0 else newBook.ppn
|
||||
)
|
||||
params = (f"%{args}%",)
|
||||
data = self.query_db(query, params, one=True)
|
||||
if data:
|
||||
return data[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
def insertNewEdition(self, newBook: BookData, oldBookId: int, for_apparat: int):
|
||||
# check if new edition already in table, check based on newBook.ppn
|
||||
check_query = "SELECT id FROM neweditions WHERE new_bookdata LIKE ?"
|
||||
check_params = (f"%{newBook.ppn}%",)
|
||||
data = self.query_db(check_query, check_params, one=True)
|
||||
if data:
|
||||
log.info("New edition already in table, skipping insert")
|
||||
return
|
||||
|
||||
query = "INSERT INTO neweditions (new_bookdata, old_edition_id, for_apparat) VALUES (?,?,?)"
|
||||
params = (newBook.to_dict, oldBookId, for_apparat)
|
||||
|
||||
self.query_db(query, params)
|
||||
|
||||
@@ -101,3 +101,12 @@ CREATE_ELSA_MEDIA_TABLE = """CREATE TABLE elsa_media (
|
||||
elsa_id INTEGER NOT NULL,
|
||||
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
|
||||
)"""
|
||||
CREATE_TABLE_NEWEDITIONS = """CREATE TABLE neweditions (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
new_bookdata TEXT,
|
||||
old_edition_id INTEGER,
|
||||
for_apparat INTEGER,
|
||||
ordered BOOLEAN DEFAULT (0),
|
||||
FOREIGN KEY (old_edition_id) REFERENCES media (id),
|
||||
FOREIGN KEY (for_apparat) REFERENCES semesterapparat (id)
|
||||
)"""
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from src import settings
|
||||
|
||||
database = settings.database
|
||||
|
||||
68
src/backend/migration_runner.py
Normal file
68
src/backend/migration_runner.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import os
|
||||
import sqlite3 as sql
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from src import DATABASE_DIR, settings
|
||||
from src.shared.logging import log
|
||||
|
||||
MIGRATIONS_DIR = Path(__file__).parent / "migrations"
|
||||
|
||||
|
||||
def _ensure_migrations_table(conn: sql.Connection) -> None:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||
id TEXT PRIMARY KEY,
|
||||
applied_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _applied_migrations(conn: sql.Connection) -> List[str]:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT id FROM schema_migrations ORDER BY id")
|
||||
rows = cursor.fetchall()
|
||||
return [r[0] for r in rows]
|
||||
|
||||
|
||||
def _apply_sql_file(conn: sql.Connection, path: Path) -> None:
|
||||
log.info(f"Applying migration {path.name}")
|
||||
sql_text = path.read_text(encoding="utf-8")
|
||||
cursor = conn.cursor()
|
||||
cursor.executescript(sql_text)
|
||||
cursor.execute(
|
||||
"INSERT OR REPLACE INTO schema_migrations (id) VALUES (?)", (path.name,)
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def run_migrations(db_path: Path) -> None:
|
||||
"""Run all unapplied migrations from the migrations directory against the database at db_path."""
|
||||
if not MIGRATIONS_DIR.exists():
|
||||
log.debug("Migrations directory does not exist, skipping migrations")
|
||||
return
|
||||
|
||||
# Ensure database directory exists
|
||||
db_dir = settings.database.path or Path(DATABASE_DIR)
|
||||
if not db_dir.exists():
|
||||
os.makedirs(db_dir, exist_ok=True)
|
||||
|
||||
conn = sql.connect(db_path)
|
||||
try:
|
||||
_ensure_migrations_table(conn)
|
||||
applied = set(_applied_migrations(conn))
|
||||
|
||||
migration_files = sorted(
|
||||
[p for p in MIGRATIONS_DIR.iterdir() if p.suffix in (".sql",)]
|
||||
)
|
||||
for m in migration_files:
|
||||
if m.name in applied:
|
||||
log.debug(f"Skipping already applied migration {m.name}")
|
||||
continue
|
||||
_apply_sql_file(conn, m)
|
||||
finally:
|
||||
conn.close()
|
||||
132
src/backend/migrations/V001__create_base_tables.sql
Normal file
132
src/backend/migrations/V001__create_base_tables.sql
Normal file
@@ -0,0 +1,132 @@
|
||||
BEGIN TRANSACTION;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS semesterapparat (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
name TEXT,
|
||||
prof_id INTEGER,
|
||||
fach TEXT,
|
||||
appnr INTEGER,
|
||||
erstellsemester TEXT,
|
||||
verlängert_am TEXT,
|
||||
dauer BOOLEAN,
|
||||
verlängerung_bis TEXT,
|
||||
deletion_status INTEGER,
|
||||
deleted_date TEXT,
|
||||
apparat_id_adis INTEGER,
|
||||
prof_id_adis INTEGER,
|
||||
konto INTEGER,
|
||||
FOREIGN KEY (prof_id) REFERENCES prof (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS media (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
bookdata TEXT,
|
||||
app_id INTEGER,
|
||||
prof_id INTEGER,
|
||||
deleted INTEGER DEFAULT (0),
|
||||
available BOOLEAN,
|
||||
reservation BOOLEAN,
|
||||
FOREIGN KEY (prof_id) REFERENCES prof (id),
|
||||
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
id INTEGER PRIMARY KEY,
|
||||
filename TEXT,
|
||||
fileblob BLOB,
|
||||
app_id INTEGER,
|
||||
filetyp TEXT,
|
||||
prof_id INTEGER REFERENCES prof (id),
|
||||
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS messages (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
created_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
message TEXT NOT NULL,
|
||||
remind_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
user_id INTEGER NOT NULL,
|
||||
appnr INTEGER,
|
||||
FOREIGN KEY (user_id) REFERENCES user (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS prof (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
titel TEXT,
|
||||
fname TEXT,
|
||||
lname TEXT,
|
||||
fullname TEXT NOT NULL UNIQUE,
|
||||
mail TEXT,
|
||||
telnr TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS user (
|
||||
id integer NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
created_at datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
username TEXT NOT NULL UNIQUE,
|
||||
password TEXT NOT NULL,
|
||||
salt TEXT NOT NULL,
|
||||
role TEXT NOT NULL,
|
||||
email TEXT UNIQUE,
|
||||
name TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS subjects (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
name TEXT NOT NULL UNIQUE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS elsa (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
date TEXT NOT NULL,
|
||||
semester TEXT NOT NULL,
|
||||
prof_id INTEGER NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS elsa_files (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
filename TEXT NOT NULL,
|
||||
fileblob BLOB NOT NULL,
|
||||
elsa_id INTEGER NOT NULL,
|
||||
filetyp TEXT NOT NULL,
|
||||
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS elsa_media (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
work_author TEXT,
|
||||
section_author TEXT,
|
||||
year TEXT,
|
||||
edition TEXT,
|
||||
work_title TEXT,
|
||||
chapter_title TEXT,
|
||||
location TEXT,
|
||||
publisher TEXT,
|
||||
signature TEXT,
|
||||
issue TEXT,
|
||||
pages TEXT,
|
||||
isbn TEXT,
|
||||
type TEXT,
|
||||
elsa_id INTEGER NOT NULL,
|
||||
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS neweditions (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
new_bookdata TEXT,
|
||||
old_edition_id INTEGER,
|
||||
for_apparat INTEGER,
|
||||
ordered BOOLEAN DEFAULT (0),
|
||||
FOREIGN KEY (old_edition_id) REFERENCES media (id),
|
||||
FOREIGN KEY (for_apparat) REFERENCES semesterapparat (id)
|
||||
);
|
||||
|
||||
-- Helpful indices to speed up frequent lookups and joins
|
||||
CREATE INDEX IF NOT EXISTS idx_media_app_prof ON media(app_id, prof_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_media_deleted ON media(deleted);
|
||||
CREATE INDEX IF NOT EXISTS idx_media_available ON media(available);
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_remind_at ON messages(remind_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_semesterapparat_prof ON semesterapparat(prof_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_semesterapparat_appnr ON semesterapparat(appnr);
|
||||
|
||||
COMMIT;
|
||||
10
src/backend/migrations/V002__create_table_webadis_login.sql
Normal file
10
src/backend/migrations/V002__create_table_webadis_login.sql
Normal file
@@ -0,0 +1,10 @@
|
||||
BEGIN TRANSACTION;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS webadis_login (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
username TEXT NOT NULL,
|
||||
password TEXT NOT NULL
|
||||
);
|
||||
|
||||
COMMIT;
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
BEGIN TRANSACTION;
|
||||
|
||||
ALTER TABLE webadis_login
|
||||
ADD COLUMN effective_range TEXT;
|
||||
|
||||
COMMIT;
|
||||
@@ -1,19 +1,10 @@
|
||||
from PySide6.QtCore import QThread
|
||||
from PySide6.QtCore import Signal
|
||||
from PySide6.QtCore import QThread, Signal
|
||||
|
||||
from src.backend import Database
|
||||
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
import loguru
|
||||
import sys
|
||||
from src import LOG_DIR
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
|
||||
log.add(sys.stdout, level="INFO")
|
||||
# Logger configured centrally in main; this module just uses `log`
|
||||
|
||||
|
||||
class BookGrabber(QThread):
|
||||
@@ -31,9 +22,10 @@ class BookGrabber(QThread):
|
||||
self.book_id = None
|
||||
self.use_any = False
|
||||
self.use_exact = False
|
||||
self.app_id = None
|
||||
self.app_nr = None
|
||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||
self.request = WebRequest()
|
||||
self.db = Database()
|
||||
|
||||
def add_values(
|
||||
self, app_id: int, prof_id: int, mode: str, data, any_book=False, exact=False
|
||||
@@ -45,13 +37,15 @@ class BookGrabber(QThread):
|
||||
self.use_any = any_book
|
||||
self.use_exact = exact
|
||||
log.info(f"Working on {len(self.data)} entries")
|
||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
|
||||
log.debug("State: " + str(self.tstate))
|
||||
self.request.set_apparat(self.app_id)
|
||||
app_nr = self.db.query_db(
|
||||
"SELECT appnr FROM semesterapparat WHERE id = ?", (self.app_id,)
|
||||
)[0][0]
|
||||
self.request.set_apparat(app_nr)
|
||||
# log.debug(self.tstate)
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
item = 0
|
||||
iterdata = self.data
|
||||
# log.debug(iterdata)
|
||||
@@ -91,7 +85,7 @@ class BookGrabber(QThread):
|
||||
state = 0
|
||||
for result in transformer.RDS_DATA:
|
||||
# log.debug(result.RDS_LOCATION)
|
||||
if str(self.app_id) in result.RDS_LOCATION:
|
||||
if str(self.app_nr) in result.RDS_LOCATION:
|
||||
state = 1
|
||||
break
|
||||
|
||||
@@ -126,27 +120,27 @@ class BookGrabberTest(QThread):
|
||||
self.is_Running = True
|
||||
log.info("Starting worker thread")
|
||||
self.data = None
|
||||
self.app_id = None
|
||||
self.app_nr = None
|
||||
self.prof_id = None
|
||||
self.mode = None
|
||||
self.book_id = None
|
||||
self.use_any = False
|
||||
self.use_exact = False
|
||||
self.app_id = appnr
|
||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||
self.app_nr = appnr
|
||||
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
|
||||
self.results = []
|
||||
|
||||
def add_values(
|
||||
self, app_id: int, prof_id: int, mode: str, data, any_book=False, exact=False
|
||||
self, app_nr: int, prof_id: int, mode: str, data, any_book=False, exact=False
|
||||
):
|
||||
self.app_id = app_id
|
||||
self.app_nr = app_nr
|
||||
self.prof_id = prof_id
|
||||
self.mode = mode
|
||||
self.data = data
|
||||
self.use_any = any_book
|
||||
self.use_exact = exact
|
||||
log.info(f"Working on {len(self.data)} entries")
|
||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
|
||||
log.debug("State: " + str(self.tstate))
|
||||
# log.debug(self.tstate)
|
||||
|
||||
@@ -159,7 +153,7 @@ class BookGrabberTest(QThread):
|
||||
signature = str(entry)
|
||||
log.info("Processing entry: " + signature)
|
||||
|
||||
webdata = WebRequest().set_apparat(self.app_id).get_ppn(entry)
|
||||
webdata = WebRequest().set_apparat(self.app_nr).get_ppn(entry)
|
||||
if self.use_any:
|
||||
webdata = webdata.use_any_book
|
||||
webdata = webdata.get_data()
|
||||
@@ -186,7 +180,7 @@ class BookGrabberTest(QThread):
|
||||
state = 0
|
||||
for result in transformer.RDS_DATA:
|
||||
# log.debug(result.RDS_LOCATION)
|
||||
if str(self.app_id) in result.RDS_LOCATION:
|
||||
if str(self.app_nr) in result.RDS_LOCATION:
|
||||
state = 1
|
||||
break
|
||||
|
||||
|
||||
345
src/backend/thread_neweditions.py
Normal file
345
src/backend/thread_neweditions.py
Normal file
@@ -0,0 +1,345 @@
|
||||
import os
|
||||
import re
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from math import ceil
|
||||
from queue import Empty, Queue
|
||||
from time import monotonic # <-- NEW
|
||||
from typing import List, Optional
|
||||
|
||||
from PySide6.QtCore import QThread, Signal
|
||||
|
||||
# from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
from src.backend.catalogue import Catalogue
|
||||
from src.logic import BookData
|
||||
from src.logic.SRU import SWB
|
||||
from src.shared.logging import log
|
||||
|
||||
# use all available cores - 2, but at least 1
|
||||
THREAD_COUNT = max(os.cpu_count() - 2, 1)
|
||||
THREAD_MIN_ITEMS = 5
|
||||
|
||||
# Logger configured centrally in main; use shared `log`
|
||||
|
||||
swb = SWB()
|
||||
dnb = SWB()
|
||||
cat = Catalogue()
|
||||
|
||||
RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative RVK character set
|
||||
|
||||
|
||||
def find_newer_edition(
|
||||
swb_result: BookData, dnb_result: List[BookData]
|
||||
) -> Optional[List[BookData]]:
|
||||
"""
|
||||
New edition if:
|
||||
- year > swb.year OR
|
||||
- edition_number > swb.edition_number
|
||||
BUT: discard any candidate with year < swb.year (if both years are known).
|
||||
|
||||
Same-work check:
|
||||
- Compare RVK roots of signatures (after stripping trailing '+N' and '(N)').
|
||||
- If both have signatures and RVKs differ -> skip.
|
||||
|
||||
Preferences (in order):
|
||||
1) RVK matches SWB
|
||||
2) Print over Online-Ressource
|
||||
3) Has signature
|
||||
4) Newer: (year desc, edition_number desc)
|
||||
"""
|
||||
|
||||
def strip_copy_and_edition(s: str) -> str:
|
||||
s = re.sub(r"\(\s*\d+\s*\)", "", s) # remove '(N)'
|
||||
s = re.sub(r"\s*\+\s*\d+\s*$", "", s) # remove trailing '+N'
|
||||
return s
|
||||
|
||||
def extract_rvk_root(sig: Optional[str]) -> str:
|
||||
if not sig:
|
||||
return ""
|
||||
t = strip_copy_and_edition(sig.upper())
|
||||
t = re.sub(r"\s+", " ", t).strip()
|
||||
m = re.match(rf"^([A-Z]{{1,3}}\s*{RVK_ALLOWED}*)", t)
|
||||
if not m:
|
||||
cleaned = re.sub(rf"[^{RVK_ALLOWED} ]+", "", t).strip()
|
||||
return cleaned.split(" ")[0] if cleaned else ""
|
||||
return re.sub(r"\s+", " ", m.group(1)).strip()
|
||||
|
||||
def has_sig(b: BookData) -> bool:
|
||||
return bool(getattr(b, "signature", None))
|
||||
|
||||
def is_online(b: BookData) -> bool:
|
||||
return (getattr(b, "media_type", None) or "").strip() == "Online-Ressource"
|
||||
|
||||
def is_print(b: BookData) -> bool:
|
||||
return not is_online(b)
|
||||
|
||||
def rvk_matches_swb(b: BookData) -> bool:
|
||||
if not has_sig(b) or not has_sig(swb_result):
|
||||
return False
|
||||
return extract_rvk_root(b.signature) == extract_rvk_root(swb_result.signature)
|
||||
|
||||
def strictly_newer(b: BookData) -> bool:
|
||||
# Hard guard: if both years are known and candidate is older, discard
|
||||
if (
|
||||
b.year is not None
|
||||
and swb_result.year is not None
|
||||
and b.year < swb_result.year
|
||||
):
|
||||
return False
|
||||
|
||||
newer_by_year = (
|
||||
b.year is not None
|
||||
and swb_result.year is not None
|
||||
and b.year > swb_result.year
|
||||
)
|
||||
newer_by_edition = (
|
||||
b.edition_number is not None
|
||||
and swb_result.edition_number is not None
|
||||
and b.edition_number > swb_result.edition_number
|
||||
)
|
||||
# Thanks to the guard above, newer_by_edition can't pick something with a smaller year.
|
||||
return newer_by_year or newer_by_edition
|
||||
|
||||
swb_has_sig = has_sig(swb_result)
|
||||
swb_rvk = extract_rvk_root(getattr(swb_result, "signature", None))
|
||||
|
||||
# 1) Filter: same work (by RVK if both have sigs) AND strictly newer
|
||||
candidates: List[BookData] = []
|
||||
for b in dnb_result:
|
||||
if has_sig(b) and swb_has_sig:
|
||||
if extract_rvk_root(b.signature) != swb_rvk:
|
||||
continue # different work
|
||||
if strictly_newer(b):
|
||||
candidates.append(b)
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
# 2) Dedupe by PPN → prefer (rvk-match, is-print, has-signature)
|
||||
def pref_score(x: BookData) -> tuple[int, int, int]:
|
||||
return (
|
||||
1 if rvk_matches_swb(x) else 0,
|
||||
1 if is_print(x) else 0,
|
||||
1 if has_sig(x) else 0,
|
||||
)
|
||||
|
||||
by_ppn: dict[Optional[str], BookData] = {}
|
||||
for b in candidates:
|
||||
key = getattr(b, "ppn", None)
|
||||
prev = by_ppn.get(key)
|
||||
if prev is None or pref_score(b) > pref_score(prev):
|
||||
by_ppn[key] = b
|
||||
|
||||
deduped = list(by_ppn.values())
|
||||
if not deduped:
|
||||
return None
|
||||
|
||||
# 3) Preserve all qualifying newer editions, but order by preference
|
||||
def sort_key(b: BookData):
|
||||
year = b.year if b.year is not None else -1
|
||||
ed = b.edition_number if b.edition_number is not None else -1
|
||||
return (
|
||||
1 if rvk_matches_swb(b) else 0,
|
||||
1 if is_print(b) else 0,
|
||||
1 if has_sig(b) else 0,
|
||||
year,
|
||||
ed,
|
||||
)
|
||||
|
||||
deduped.sort(key=sort_key, reverse=True)
|
||||
return deduped
|
||||
|
||||
|
||||
class NewEditionCheckerThread(QThread):
|
||||
updateSignal = Signal(int, int) # (processed, total)
|
||||
updateProgress = Signal(int, int) # (processed, total)
|
||||
total_entries_signal = Signal(int)
|
||||
resultsSignal = Signal(list) # list[tuple[BookData, list[BookData]]]
|
||||
|
||||
# NEW: metrics signals
|
||||
rateSignal = Signal(float) # items per second ("it/s")
|
||||
etaSignal = Signal(int) # seconds remaining (-1 when unknown)
|
||||
|
||||
def __init__(self, entries: Optional[list["BookData"]] = None, parent=None):
|
||||
super().__init__(parent)
|
||||
self.entries: list["BookData"] = entries if entries is not None else []
|
||||
self.results: list[tuple["BookData", list["BookData"]]] = []
|
||||
|
||||
def reset(self):
|
||||
self.entries = []
|
||||
self.results = []
|
||||
|
||||
# ---------- internal helpers ----------
|
||||
|
||||
@staticmethod
|
||||
def _split_evenly(items: list, parts: int) -> list[list]:
|
||||
"""Split items as evenly as possible into `parts` chunks (no empty tails)."""
|
||||
if parts <= 1 or len(items) <= 1:
|
||||
return [items]
|
||||
n = len(items)
|
||||
base = n // parts
|
||||
extra = n % parts
|
||||
chunks = []
|
||||
i = 0
|
||||
for k in range(parts):
|
||||
size = base + (1 if k < extra else 0)
|
||||
if size == 0:
|
||||
continue
|
||||
chunks.append(items[i : i + size])
|
||||
i += size
|
||||
return chunks
|
||||
|
||||
@staticmethod
|
||||
def _clean_title(raw: str) -> str:
|
||||
title = raw.rstrip(" .:,;!?")
|
||||
title = re.sub(r"\s*\(.*\)", "", title)
|
||||
return title.strip()
|
||||
|
||||
@classmethod
|
||||
def _process_book(
|
||||
cls, book: "BookData"
|
||||
) -> tuple["BookData", list["BookData"]] | None:
|
||||
"""Process one book; returns (original, [found editions]) or None on failure."""
|
||||
if not book.title:
|
||||
return None
|
||||
response: list["BookData"] = []
|
||||
query = [
|
||||
f"pica.tit={book.title}",
|
||||
f"pica.vlg={book.publisher}",
|
||||
]
|
||||
|
||||
swb_result = swb.getBooks(["pica.bib=20735", f"pica.ppn={book.ppn}"])[0]
|
||||
dnb_results = swb.getBooks(query)
|
||||
new_editions = find_newer_edition(swb_result, dnb_results)
|
||||
|
||||
if new_editions is not None:
|
||||
for new_edition in new_editions:
|
||||
new_edition.library_location = cat.get_location(new_edition.ppn)
|
||||
try:
|
||||
isbn = (
|
||||
str(new_edition.isbn[0])
|
||||
if isinstance(new_edition.isbn, list)
|
||||
else str(new_edition.isbn)
|
||||
)
|
||||
new_edition.link = (
|
||||
f"https://www.lehmanns.de/search/quick?mediatype_id=2&q={isbn}"
|
||||
)
|
||||
except (IndexError, TypeError):
|
||||
isbn = None
|
||||
new_edition.in_library = cat.in_library(new_edition.ppn)
|
||||
response = new_editions
|
||||
|
||||
# client = SWB()
|
||||
# response: list["BookData"] = []
|
||||
# # First, search by title only
|
||||
# results = client.getBooks([f"pica.title={title}", f"pica.vlg={book.publisher}"])
|
||||
|
||||
# lehmanns = LehmannsClient()
|
||||
# results = lehmanns.search_by_title(title)
|
||||
# for result in results:
|
||||
# if "(eBook)" in result.title:
|
||||
# result.title = result.title.replace("(eBook)", "").strip()
|
||||
# swb_results = client.getBooks(
|
||||
# [
|
||||
# f"pica.tit={result.title}",
|
||||
# f"pica.vlg={result.publisher.split(',')[0]}",
|
||||
# ]
|
||||
# )
|
||||
# for swb in swb_results:
|
||||
# if swb.isbn == result.isbn:
|
||||
# result.ppn = swb.ppn
|
||||
# result.signature = swb.signature
|
||||
# response.append(result)
|
||||
# if (result.edition_number < swb.edition_number) and (
|
||||
# swb.year > result.year
|
||||
# ):
|
||||
# response.append(result)
|
||||
if response == []:
|
||||
return None
|
||||
# Remove duplicates based on ppn
|
||||
return (book, response)
|
||||
|
||||
@classmethod
|
||||
def _worker(cls, items: list["BookData"], q: Queue) -> None:
|
||||
"""Worker for one chunk; pushes ('result', ...), ('progress', 1), and ('done', None)."""
|
||||
try:
|
||||
for book in items:
|
||||
try:
|
||||
result = cls._process_book(book)
|
||||
except Exception:
|
||||
result = None
|
||||
if result is not None:
|
||||
q.put(("result", result))
|
||||
q.put(("progress", 1))
|
||||
finally:
|
||||
q.put(("done", None))
|
||||
|
||||
# ---------- thread entry point ----------
|
||||
|
||||
def run(self):
|
||||
total = len(self.entries)
|
||||
self.total_entries_signal.emit(total)
|
||||
|
||||
# start timer for metrics
|
||||
t0 = monotonic()
|
||||
|
||||
if total == 0:
|
||||
log.debug("No entries to process.")
|
||||
# emit metrics (zero work)
|
||||
self.rateSignal.emit(0.0)
|
||||
self.etaSignal.emit(0)
|
||||
self.resultsSignal.emit([])
|
||||
return
|
||||
|
||||
# Up to 4 workers; ~20 items per worker
|
||||
num_workers = min(THREAD_COUNT, max(1, ceil(total / THREAD_MIN_ITEMS)))
|
||||
chunks = self._split_evenly(self.entries, num_workers)
|
||||
sizes = [len(ch) for ch in chunks]
|
||||
|
||||
q: Queue = Queue()
|
||||
processed = 0
|
||||
finished_workers = 0
|
||||
|
||||
with ThreadPoolExecutor(max_workers=len(chunks)) as ex:
|
||||
futures = [ex.submit(self._worker, ch, q) for ch in chunks]
|
||||
|
||||
log.info(
|
||||
f"Launched {len(futures)} worker thread(s) for {total} entries: {sizes} entries per thread."
|
||||
)
|
||||
for idx, sz in enumerate(sizes, 1):
|
||||
log.debug(f"Thread {idx}: {sz} entries")
|
||||
|
||||
# Aggregate progress/results
|
||||
while finished_workers < len(chunks):
|
||||
try:
|
||||
kind, payload = q.get(timeout=0.1)
|
||||
except Empty:
|
||||
continue
|
||||
|
||||
if kind == "progress":
|
||||
processed += int(payload)
|
||||
self.updateSignal.emit(processed, total)
|
||||
self.updateProgress.emit(processed, total)
|
||||
|
||||
# ---- NEW: compute & emit metrics ----
|
||||
elapsed = max(1e-9, monotonic() - t0)
|
||||
rate = processed / elapsed # items per second
|
||||
remaining = max(0, total - processed)
|
||||
eta_sec = int(round(remaining / rate)) if rate > 0 else -1
|
||||
|
||||
self.rateSignal.emit(rate)
|
||||
# clamp negative just in case
|
||||
self.etaSignal.emit(max(0, eta_sec) if eta_sec >= 0 else -1)
|
||||
# -------------------------------------
|
||||
|
||||
elif kind == "result":
|
||||
self.results.append(payload)
|
||||
elif kind == "done":
|
||||
finished_workers += 1
|
||||
|
||||
# Final metrics on completion
|
||||
elapsed_total = max(1e-9, monotonic() - t0)
|
||||
final_rate = total / elapsed_total
|
||||
self.rateSignal.emit(final_rate)
|
||||
self.etaSignal.emit(0)
|
||||
|
||||
self.resultsSignal.emit(self.results)
|
||||
@@ -1,13 +1,15 @@
|
||||
import sys
|
||||
import time
|
||||
|
||||
import loguru
|
||||
|
||||
# from icecream import ic
|
||||
from PySide6.QtCore import QThread
|
||||
from PySide6.QtCore import Signal as Signal
|
||||
|
||||
from src.backend import Database
|
||||
import loguru
|
||||
import sys
|
||||
from src import LOG_DIR
|
||||
from src.backend import Database
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
@@ -29,8 +31,8 @@ class AutoAdder(QThread):
|
||||
self.app_id = app_id
|
||||
self.prof_id = prof_id
|
||||
|
||||
# print("Launched AutoAdder")
|
||||
# print(self.data, self.app_id, self.prof_id)
|
||||
# #print("Launched AutoAdder")
|
||||
# #print(self.data, self.app_id, self.prof_id)
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
@@ -46,7 +48,7 @@ class AutoAdder(QThread):
|
||||
time.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
# print(e)
|
||||
# #print(e)
|
||||
log.exception(
|
||||
f"The query failed with message {e} for signature {entry}"
|
||||
)
|
||||
|
||||
@@ -1,22 +1,11 @@
|
||||
import time
|
||||
|
||||
# from icecream import ic
|
||||
from PySide6.QtCore import QThread
|
||||
from PySide6.QtCore import Signal as Signal
|
||||
|
||||
from src.backend.database import Database
|
||||
from src import LOG_DIR
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
|
||||
# from src.transformers import RDS_AVAIL_DATA
|
||||
import loguru
|
||||
import sys
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
|
||||
from src.backend.webadis import get_book_medianr
|
||||
from src.logic.webrequest import BibTextTransformer, TransformerType, WebRequest
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
class AvailChecker(QThread):
|
||||
@@ -24,7 +13,11 @@ class AvailChecker(QThread):
|
||||
updateProgress = Signal(int, int)
|
||||
|
||||
def __init__(
|
||||
self, links: list = None, appnumber: int = None, parent=None, books=list[dict]
|
||||
self,
|
||||
links: list[str] | None = None,
|
||||
appnumber: int | None = None,
|
||||
parent=None,
|
||||
books: list[dict] | None = None,
|
||||
):
|
||||
if links is None:
|
||||
links = []
|
||||
@@ -39,11 +32,13 @@ class AvailChecker(QThread):
|
||||
)
|
||||
self.links = links
|
||||
self.appnumber = appnumber
|
||||
self.books = books
|
||||
self.books = books or []
|
||||
log.info(
|
||||
f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books..."
|
||||
)
|
||||
time.sleep(2)
|
||||
# Pre-create reusable request and transformer to avoid per-item overhead
|
||||
self._request = WebRequest().set_apparat(self.appnumber)
|
||||
self._rds_transformer = BibTextTransformer(TransformerType.RDS)
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
@@ -51,16 +46,18 @@ class AvailChecker(QThread):
|
||||
count = 0
|
||||
for link in self.links:
|
||||
log.info("Processing entry: " + str(link))
|
||||
data = WebRequest().set_apparat(self.appnumber).get_ppn(link).get_data()
|
||||
transformer = BibTextTransformer("RDS")
|
||||
rds = transformer.get_data(data).return_data("rds_availability")
|
||||
data = self._request.get_ppn(link).get_data()
|
||||
rds = self._rds_transformer.get_data(data).return_data("rds_availability")
|
||||
|
||||
book_id = None
|
||||
if not rds or not rds.items:
|
||||
log.warning(f"No RDS data found for link {link}")
|
||||
continue
|
||||
for item in rds.items:
|
||||
sign = item.superlocation
|
||||
loc = item.location
|
||||
# # print(item.location)
|
||||
if self.appnumber in sign or self.appnumber in loc:
|
||||
# # #print(item.location)
|
||||
if str(self.appnumber) in sign or str(self.appnumber) in loc:
|
||||
state = 1
|
||||
break
|
||||
for book in self.books:
|
||||
@@ -68,7 +65,13 @@ class AvailChecker(QThread):
|
||||
book_id = book["id"]
|
||||
break
|
||||
log.info(f"State of {link}: " + str(state))
|
||||
# print("Updating availability of " + str(book_id) + " to " + str(state))
|
||||
# #print("Updating availability of " + str(book_id) + " to " + str(state))
|
||||
# use get_book_medianr to update the medianr of the book in the database
|
||||
auth = self.db.getWebADISAuth
|
||||
medianr = get_book_medianr(rds.items[0].callnumber, self.appnumber, auth)
|
||||
book_data = book["bookdata"]
|
||||
book_data.medianr = medianr
|
||||
self.db.updateBookdata(book["id"], book_data)
|
||||
self.db.setAvailability(book_id, state)
|
||||
count += 1
|
||||
self.updateProgress.emit(count, len(self.links))
|
||||
|
||||
35
src/backend/webadis.py
Normal file
35
src/backend/webadis.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
|
||||
def get_book_medianr(signature: str, semesterapparat_nr: int, auth: tuple) -> str:
|
||||
with sync_playwright() as playwright:
|
||||
browser = playwright.chromium.launch(headless=True)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
page.goto(
|
||||
"https://bsz.ibs-bw.de:22998/aDISWeb/app?service=direct/0/Home/$DirectLink&sp=SDAP42"
|
||||
)
|
||||
page.get_by_role("textbox", name="Benutzer").fill(auth[0])
|
||||
page.get_by_role("textbox", name="Benutzer").press("Tab")
|
||||
page.get_by_role("textbox", name="Kennwort").fill(auth[1])
|
||||
page.get_by_role("textbox", name="Kennwort").press("Enter")
|
||||
page.get_by_role("button", name="Katalog").click()
|
||||
page.get_by_role("textbox", name="Signatur").click()
|
||||
page.get_by_role("textbox", name="Signatur").fill(signature)
|
||||
page.get_by_role("textbox", name="Signatur").press("Enter")
|
||||
book_list = page.locator("iframe").content_frame.get_by_role(
|
||||
"cell", name="Bibliothek der Pädagogischen"
|
||||
)
|
||||
# this will always find one result, we need to split the resulting text based on the entries that start with "* "
|
||||
book_entries = book_list.inner_text().split("\n")
|
||||
books = []
|
||||
for entry in book_entries:
|
||||
if entry.startswith("* "):
|
||||
books.append(entry)
|
||||
for book in books:
|
||||
if f"Semesterapparat: {semesterapparat_nr}" in book:
|
||||
return book.split("* ")[1].split(":")[0]
|
||||
|
||||
# ---------------------
|
||||
context.close()
|
||||
browser.close()
|
||||
16
src/background/__init__.py
Normal file
16
src/background/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""Background tasks and threading operations."""
|
||||
|
||||
from .autoadder import AutoAdder
|
||||
from .availability_checker import AvailChecker
|
||||
from .book_grabber import BookGrabber, BookGrabberTest
|
||||
from .new_editions import NewEditionCheckerThread
|
||||
from .documentation_server import DocumentationThread
|
||||
|
||||
__all__ = [
|
||||
"AutoAdder",
|
||||
"AvailChecker",
|
||||
"BookGrabber",
|
||||
"BookGrabberTest",
|
||||
"NewEditionCheckerThread",
|
||||
"DocumentationThread",
|
||||
]
|
||||
59
src/background/autoadder.py
Normal file
59
src/background/autoadder.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import sys
|
||||
import time
|
||||
|
||||
import loguru
|
||||
|
||||
# from icecream import ic
|
||||
from PySide6.QtCore import QThread
|
||||
from PySide6.QtCore import Signal as Signal
|
||||
|
||||
from src import LOG_DIR
|
||||
from src.database import Database
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
|
||||
|
||||
# from src.transformers import RDS_AVAIL_DATA
|
||||
|
||||
|
||||
class AutoAdder(QThread):
|
||||
updateSignal = Signal(int)
|
||||
|
||||
setTextSignal = Signal(int)
|
||||
progress = Signal(int)
|
||||
|
||||
def __init__(self, data=None, app_id=None, prof_id=None, parent=None):
|
||||
super().__init__(parent)
|
||||
self.data = data
|
||||
self.app_id = app_id
|
||||
self.prof_id = prof_id
|
||||
|
||||
# #print("Launched AutoAdder")
|
||||
# #print(self.data, self.app_id, self.prof_id)
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
# show the dialog, start the thread to gather data and dynamically update progressbar and listwidget
|
||||
log.info("Starting worker thread")
|
||||
item = 0
|
||||
for entry in self.data:
|
||||
try:
|
||||
self.updateSignal.emit(item)
|
||||
self.setTextSignal.emit(entry)
|
||||
item += 1
|
||||
self.progress.emit(item)
|
||||
time.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
# #print(e)
|
||||
log.exception(
|
||||
f"The query failed with message {e} for signature {entry}"
|
||||
)
|
||||
continue
|
||||
if item == len(self.data):
|
||||
log.info("Worker thread finished")
|
||||
# teminate thread
|
||||
self.finished.emit()
|
||||
83
src/background/availability_checker.py
Normal file
83
src/background/availability_checker.py
Normal file
@@ -0,0 +1,83 @@
|
||||
# from icecream import ic
|
||||
from PySide6.QtCore import QThread
|
||||
from PySide6.QtCore import Signal as Signal
|
||||
|
||||
from src.database import Database
|
||||
from src.services.webadis import get_book_medianr
|
||||
from src.services.webrequest import BibTextTransformer, TransformerType, WebRequest
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
class AvailChecker(QThread):
|
||||
updateSignal = Signal(str, int)
|
||||
updateProgress = Signal(int, int)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
links: list[str] | None = None,
|
||||
appnumber: int | None = None,
|
||||
parent=None,
|
||||
books: list[dict] | None = None,
|
||||
):
|
||||
if links is None:
|
||||
links = []
|
||||
super().__init__(parent)
|
||||
log.info("Starting worker thread")
|
||||
log.info(
|
||||
"Checking availability for "
|
||||
+ str(links)
|
||||
+ " with appnumber "
|
||||
+ str(appnumber)
|
||||
+ "..."
|
||||
)
|
||||
self.links = links
|
||||
self.appnumber = appnumber
|
||||
self.books = books or []
|
||||
log.info(
|
||||
f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books..."
|
||||
)
|
||||
# Pre-create reusable request and transformer to avoid per-item overhead
|
||||
self._request = WebRequest().set_apparat(self.appnumber)
|
||||
self._rds_transformer = BibTextTransformer(TransformerType.RDS)
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
state = 0
|
||||
count = 0
|
||||
for link in self.links:
|
||||
log.info("Processing entry: " + str(link))
|
||||
data = self._request.get_ppn(link).get_data()
|
||||
rds = self._rds_transformer.get_data(data).return_data("rds_availability")
|
||||
|
||||
book_id = None
|
||||
if not rds or not rds.items:
|
||||
log.warning(f"No RDS data found for link {link}")
|
||||
continue
|
||||
for item in rds.items:
|
||||
sign = item.superlocation
|
||||
loc = item.location
|
||||
# # #print(item.location)
|
||||
if str(self.appnumber) in sign or str(self.appnumber) in loc:
|
||||
state = 1
|
||||
break
|
||||
for book in self.books:
|
||||
if book["bookdata"].signature == link:
|
||||
book_id = book["id"]
|
||||
break
|
||||
log.info(f"State of {link}: " + str(state))
|
||||
# #print("Updating availability of " + str(book_id) + " to " + str(state))
|
||||
# use get_book_medianr to update the medianr of the book in the database
|
||||
auth = self.db.getWebADISAuth
|
||||
medianr = get_book_medianr(rds.items[0].callnumber, self.appnumber, auth)
|
||||
book_data = book["bookdata"]
|
||||
book_data.medianr = medianr
|
||||
self.db.updateBookdata(book["id"], book_data)
|
||||
self.db.setAvailability(book_id, state)
|
||||
count += 1
|
||||
self.updateProgress.emit(count, len(self.links))
|
||||
self.updateSignal.emit(item.callnumber, state)
|
||||
|
||||
log.info("Worker thread finished")
|
||||
# teminate thread
|
||||
|
||||
self.quit()
|
||||
199
src/background/book_grabber.py
Normal file
199
src/background/book_grabber.py
Normal file
@@ -0,0 +1,199 @@
|
||||
from PySide6.QtCore import QThread, Signal
|
||||
|
||||
from src.database import Database
|
||||
from src.services.webrequest import BibTextTransformer, WebRequest
|
||||
from src.shared.logging import log
|
||||
|
||||
# Logger configured centrally in main; this module just uses `log`
|
||||
|
||||
|
||||
class BookGrabber(QThread):
|
||||
updateSignal = Signal(int, int)
|
||||
done = Signal()
|
||||
|
||||
def __init__(self):
|
||||
super(BookGrabber, self).__init__(parent=None)
|
||||
self.is_Running = True
|
||||
log.info("Starting worker thread")
|
||||
self.data = []
|
||||
self.app_id = None
|
||||
self.prof_id = None
|
||||
self.mode = None
|
||||
self.book_id = None
|
||||
self.use_any = False
|
||||
self.use_exact = False
|
||||
self.app_nr = None
|
||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||
self.request = WebRequest()
|
||||
self.db = Database()
|
||||
|
||||
def add_values(
|
||||
self, app_id: int, prof_id: int, mode: str, data, any_book=False, exact=False
|
||||
):
|
||||
self.app_id = app_id
|
||||
self.prof_id = prof_id
|
||||
self.mode = mode
|
||||
self.data: list[str] = data
|
||||
self.use_any = any_book
|
||||
self.use_exact = exact
|
||||
log.info(f"Working on {len(self.data)} entries")
|
||||
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
|
||||
log.debug("State: " + str(self.tstate))
|
||||
app_nr = self.db.query_db(
|
||||
"SELECT appnr FROM semesterapparat WHERE id = ?", (self.app_id,)
|
||||
)[0][0]
|
||||
self.request.set_apparat(app_nr)
|
||||
# log.debug(self.tstate)
|
||||
|
||||
def run(self):
|
||||
item = 0
|
||||
iterdata = self.data
|
||||
# log.debug(iterdata)
|
||||
|
||||
for entry in iterdata:
|
||||
# log.debug(entry)
|
||||
log.info("Processing entry: {}", entry)
|
||||
|
||||
webdata = self.request.get_ppn(entry)
|
||||
if self.use_any:
|
||||
webdata = webdata.use_any_book
|
||||
webdata = webdata.get_data()
|
||||
|
||||
if webdata == "error":
|
||||
continue
|
||||
|
||||
bd = BibTextTransformer(self.mode)
|
||||
log.debug(webdata)
|
||||
if self.mode == "ARRAY":
|
||||
if self.use_exact:
|
||||
bd = bd.use_signature(entry)
|
||||
bd = bd.get_data(webdata).return_data()
|
||||
log.debug(bd)
|
||||
if bd is None:
|
||||
# bd = BookData
|
||||
continue
|
||||
bd.signature = entry
|
||||
transformer = (
|
||||
BibTextTransformer("RDS").get_data(webdata).return_data("rds_data")
|
||||
)
|
||||
|
||||
# confirm lock is acquired
|
||||
self.db.addBookToDatabase(bd, self.app_id, self.prof_id)
|
||||
# get latest book id
|
||||
self.book_id = self.db.getLastBookId()
|
||||
log.info("Added book to database")
|
||||
state = 0
|
||||
for result in transformer.RDS_DATA:
|
||||
# log.debug(result.RDS_LOCATION)
|
||||
if str(self.app_nr) in result.RDS_LOCATION:
|
||||
state = 1
|
||||
break
|
||||
|
||||
log.info(f"State of {entry}: {state}")
|
||||
log.debug(
|
||||
"updating availability of " + str(self.book_id) + " to " + str(state)
|
||||
)
|
||||
try:
|
||||
self.db.setAvailability(self.book_id, state)
|
||||
log.debug("Added book to database")
|
||||
except Exception as e:
|
||||
log.error(f"Failed to update availability: {e}")
|
||||
log.debug("Failed to update availability: " + str(e))
|
||||
|
||||
# time.sleep(5)
|
||||
item += 1
|
||||
self.updateSignal.emit(item, len(self.data))
|
||||
log.info("Worker thread finished")
|
||||
# self.done.emit()
|
||||
self.quit()
|
||||
|
||||
def stop(self):
|
||||
self.is_Running = False
|
||||
|
||||
|
||||
class BookGrabberTest(QThread):
|
||||
updateSignal = Signal(int, int)
|
||||
done = Signal()
|
||||
|
||||
def __init__(self, appnr: int):
|
||||
super(BookGrabberTest, self).__init__(parent=None)
|
||||
self.is_Running = True
|
||||
log.info("Starting worker thread")
|
||||
self.data = None
|
||||
self.app_nr = None
|
||||
self.prof_id = None
|
||||
self.mode = None
|
||||
self.book_id = None
|
||||
self.use_any = False
|
||||
self.use_exact = False
|
||||
self.app_nr = appnr
|
||||
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
|
||||
self.results = []
|
||||
|
||||
def add_values(
|
||||
self, app_nr: int, prof_id: int, mode: str, data, any_book=False, exact=False
|
||||
):
|
||||
self.app_nr = app_nr
|
||||
self.prof_id = prof_id
|
||||
self.mode = mode
|
||||
self.data = data
|
||||
self.use_any = any_book
|
||||
self.use_exact = exact
|
||||
log.info(f"Working on {len(self.data)} entries")
|
||||
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
|
||||
log.debug("State: " + str(self.tstate))
|
||||
# log.debug(self.tstate)
|
||||
|
||||
def run(self):
|
||||
item = 0
|
||||
iterdata = self.data
|
||||
# log.debug(iterdata)
|
||||
for entry in iterdata:
|
||||
# log.debug(entry)
|
||||
signature = str(entry)
|
||||
log.info("Processing entry: " + signature)
|
||||
|
||||
webdata = WebRequest().set_apparat(self.app_nr).get_ppn(entry)
|
||||
if self.use_any:
|
||||
webdata = webdata.use_any_book
|
||||
webdata = webdata.get_data()
|
||||
|
||||
if webdata == "error":
|
||||
continue
|
||||
|
||||
bd = BibTextTransformer(self.mode)
|
||||
if self.mode == "ARRAY":
|
||||
if self.use_exact:
|
||||
bd = bd.use_signature(entry)
|
||||
bd = bd.get_data(webdata).return_data()
|
||||
if bd is None:
|
||||
# bd = BookData
|
||||
continue
|
||||
bd.signature = entry
|
||||
transformer = (
|
||||
BibTextTransformer("RDS").get_data(webdata).return_data("rds_data")
|
||||
)
|
||||
|
||||
# confirm lock is acquired
|
||||
# get latest book id
|
||||
log.info("Added book to database")
|
||||
state = 0
|
||||
for result in transformer.RDS_DATA:
|
||||
# log.debug(result.RDS_LOCATION)
|
||||
if str(self.app_nr) in result.RDS_LOCATION:
|
||||
state = 1
|
||||
break
|
||||
|
||||
log.info(f"State of {signature}: {state}")
|
||||
# log.debug("updating availability of " + str(self.book_id) + " to " + str(state))
|
||||
self.results.append(bd)
|
||||
|
||||
# time.sleep(5)
|
||||
item += 1
|
||||
self.updateSignal.emit(item, len(self.data))
|
||||
log.info("Worker thread finished")
|
||||
# self.done.emit()
|
||||
self.quit()
|
||||
|
||||
def stop(self):
|
||||
self.is_Running = False
|
||||
23
src/background/documentation_server.py
Normal file
23
src/background/documentation_server.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from PySide6.QtCore import QThread, Slot
|
||||
from src.utils.documentation import website, QuietHandler
|
||||
from wsgiref.simple_server import make_server
|
||||
|
||||
|
||||
class DocumentationThread(QThread):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._server = None # store server so we can shut it down
|
||||
|
||||
def run(self):
|
||||
# launch_documentation()
|
||||
self._server = make_server(
|
||||
"localhost", 8000, website(), handler_class=QuietHandler
|
||||
)
|
||||
while not self.isInterruptionRequested():
|
||||
self._server.handle_request()
|
||||
|
||||
@Slot() # slot you can connect to aboutToQuit
|
||||
def stop(self):
|
||||
self.requestInterruption() # ask the loop above to exit
|
||||
if self._server:
|
||||
self._server.shutdown() # unblock handle_request()
|
||||
345
src/background/new_editions.py
Normal file
345
src/background/new_editions.py
Normal file
@@ -0,0 +1,345 @@
|
||||
import os
|
||||
import re
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from math import ceil
|
||||
from queue import Empty, Queue
|
||||
from time import monotonic # <-- NEW
|
||||
from typing import List, Optional
|
||||
|
||||
from PySide6.QtCore import QThread, Signal
|
||||
|
||||
# from src.services.webrequest import BibTextTransformer, WebRequest
|
||||
from src.services.catalogue import Catalogue
|
||||
from src.core.models import BookData
|
||||
from src.services.sru import SWB
|
||||
from src.shared.logging import log
|
||||
|
||||
# use all available cores - 2, but at least 1
|
||||
THREAD_COUNT = max(os.cpu_count() - 2, 1)
|
||||
THREAD_MIN_ITEMS = 5
|
||||
|
||||
# Logger configured centrally in main; use shared `log`
|
||||
|
||||
swb = SWB()
|
||||
dnb = SWB()
|
||||
cat = Catalogue()
|
||||
|
||||
RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative RVK character set
|
||||
|
||||
|
||||
def find_newer_edition(
|
||||
swb_result: BookData, dnb_result: List[BookData]
|
||||
) -> Optional[List[BookData]]:
|
||||
"""
|
||||
New edition if:
|
||||
- year > swb.year OR
|
||||
- edition_number > swb.edition_number
|
||||
BUT: discard any candidate with year < swb.year (if both years are known).
|
||||
|
||||
Same-work check:
|
||||
- Compare RVK roots of signatures (after stripping trailing '+N' and '(N)').
|
||||
- If both have signatures and RVKs differ -> skip.
|
||||
|
||||
Preferences (in order):
|
||||
1) RVK matches SWB
|
||||
2) Print over Online-Ressource
|
||||
3) Has signature
|
||||
4) Newer: (year desc, edition_number desc)
|
||||
"""
|
||||
|
||||
def strip_copy_and_edition(s: str) -> str:
|
||||
s = re.sub(r"\(\s*\d+\s*\)", "", s) # remove '(N)'
|
||||
s = re.sub(r"\s*\+\s*\d+\s*$", "", s) # remove trailing '+N'
|
||||
return s
|
||||
|
||||
def extract_rvk_root(sig: Optional[str]) -> str:
|
||||
if not sig:
|
||||
return ""
|
||||
t = strip_copy_and_edition(sig.upper())
|
||||
t = re.sub(r"\s+", " ", t).strip()
|
||||
m = re.match(rf"^([A-Z]{{1,3}}\s*{RVK_ALLOWED}*)", t)
|
||||
if not m:
|
||||
cleaned = re.sub(rf"[^{RVK_ALLOWED} ]+", "", t).strip()
|
||||
return cleaned.split(" ")[0] if cleaned else ""
|
||||
return re.sub(r"\s+", " ", m.group(1)).strip()
|
||||
|
||||
def has_sig(b: BookData) -> bool:
|
||||
return bool(getattr(b, "signature", None))
|
||||
|
||||
def is_online(b: BookData) -> bool:
|
||||
return (getattr(b, "media_type", None) or "").strip() == "Online-Ressource"
|
||||
|
||||
def is_print(b: BookData) -> bool:
|
||||
return not is_online(b)
|
||||
|
||||
def rvk_matches_swb(b: BookData) -> bool:
|
||||
if not has_sig(b) or not has_sig(swb_result):
|
||||
return False
|
||||
return extract_rvk_root(b.signature) == extract_rvk_root(swb_result.signature)
|
||||
|
||||
def strictly_newer(b: BookData) -> bool:
|
||||
# Hard guard: if both years are known and candidate is older, discard
|
||||
if (
|
||||
b.year is not None
|
||||
and swb_result.year is not None
|
||||
and b.year < swb_result.year
|
||||
):
|
||||
return False
|
||||
|
||||
newer_by_year = (
|
||||
b.year is not None
|
||||
and swb_result.year is not None
|
||||
and b.year > swb_result.year
|
||||
)
|
||||
newer_by_edition = (
|
||||
b.edition_number is not None
|
||||
and swb_result.edition_number is not None
|
||||
and b.edition_number > swb_result.edition_number
|
||||
)
|
||||
# Thanks to the guard above, newer_by_edition can't pick something with a smaller year.
|
||||
return newer_by_year or newer_by_edition
|
||||
|
||||
swb_has_sig = has_sig(swb_result)
|
||||
swb_rvk = extract_rvk_root(getattr(swb_result, "signature", None))
|
||||
|
||||
# 1) Filter: same work (by RVK if both have sigs) AND strictly newer
|
||||
candidates: List[BookData] = []
|
||||
for b in dnb_result:
|
||||
if has_sig(b) and swb_has_sig:
|
||||
if extract_rvk_root(b.signature) != swb_rvk:
|
||||
continue # different work
|
||||
if strictly_newer(b):
|
||||
candidates.append(b)
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
# 2) Dedupe by PPN → prefer (rvk-match, is-print, has-signature)
|
||||
def pref_score(x: BookData) -> tuple[int, int, int]:
|
||||
return (
|
||||
1 if rvk_matches_swb(x) else 0,
|
||||
1 if is_print(x) else 0,
|
||||
1 if has_sig(x) else 0,
|
||||
)
|
||||
|
||||
by_ppn: dict[Optional[str], BookData] = {}
|
||||
for b in candidates:
|
||||
key = getattr(b, "ppn", None)
|
||||
prev = by_ppn.get(key)
|
||||
if prev is None or pref_score(b) > pref_score(prev):
|
||||
by_ppn[key] = b
|
||||
|
||||
deduped = list(by_ppn.values())
|
||||
if not deduped:
|
||||
return None
|
||||
|
||||
# 3) Preserve all qualifying newer editions, but order by preference
|
||||
def sort_key(b: BookData):
|
||||
year = b.year if b.year is not None else -1
|
||||
ed = b.edition_number if b.edition_number is not None else -1
|
||||
return (
|
||||
1 if rvk_matches_swb(b) else 0,
|
||||
1 if is_print(b) else 0,
|
||||
1 if has_sig(b) else 0,
|
||||
year,
|
||||
ed,
|
||||
)
|
||||
|
||||
deduped.sort(key=sort_key, reverse=True)
|
||||
return deduped
|
||||
|
||||
|
||||
class NewEditionCheckerThread(QThread):
|
||||
updateSignal = Signal(int, int) # (processed, total)
|
||||
updateProgress = Signal(int, int) # (processed, total)
|
||||
total_entries_signal = Signal(int)
|
||||
resultsSignal = Signal(list) # list[tuple[BookData, list[BookData]]]
|
||||
|
||||
# NEW: metrics signals
|
||||
rateSignal = Signal(float) # items per second ("it/s")
|
||||
etaSignal = Signal(int) # seconds remaining (-1 when unknown)
|
||||
|
||||
def __init__(self, entries: Optional[list["BookData"]] = None, parent=None):
|
||||
super().__init__(parent)
|
||||
self.entries: list["BookData"] = entries if entries is not None else []
|
||||
self.results: list[tuple["BookData", list["BookData"]]] = []
|
||||
|
||||
def reset(self):
|
||||
self.entries = []
|
||||
self.results = []
|
||||
|
||||
# ---------- internal helpers ----------
|
||||
|
||||
@staticmethod
|
||||
def _split_evenly(items: list, parts: int) -> list[list]:
|
||||
"""Split items as evenly as possible into `parts` chunks (no empty tails)."""
|
||||
if parts <= 1 or len(items) <= 1:
|
||||
return [items]
|
||||
n = len(items)
|
||||
base = n // parts
|
||||
extra = n % parts
|
||||
chunks = []
|
||||
i = 0
|
||||
for k in range(parts):
|
||||
size = base + (1 if k < extra else 0)
|
||||
if size == 0:
|
||||
continue
|
||||
chunks.append(items[i : i + size])
|
||||
i += size
|
||||
return chunks
|
||||
|
||||
@staticmethod
|
||||
def _clean_title(raw: str) -> str:
|
||||
title = raw.rstrip(" .:,;!?")
|
||||
title = re.sub(r"\s*\(.*\)", "", title)
|
||||
return title.strip()
|
||||
|
||||
@classmethod
|
||||
def _process_book(
|
||||
cls, book: "BookData"
|
||||
) -> tuple["BookData", list["BookData"]] | None:
|
||||
"""Process one book; returns (original, [found editions]) or None on failure."""
|
||||
if not book.title:
|
||||
return None
|
||||
response: list["BookData"] = []
|
||||
query = [
|
||||
f"pica.tit={book.title}",
|
||||
f"pica.vlg={book.publisher}",
|
||||
]
|
||||
|
||||
swb_result = swb.getBooks(["pica.bib=20735", f"pica.ppn={book.ppn}"])[0]
|
||||
dnb_results = swb.getBooks(query)
|
||||
new_editions = find_newer_edition(swb_result, dnb_results)
|
||||
|
||||
if new_editions is not None:
|
||||
for new_edition in new_editions:
|
||||
new_edition.library_location = cat.get_location(new_edition.ppn)
|
||||
try:
|
||||
isbn = (
|
||||
str(new_edition.isbn[0])
|
||||
if isinstance(new_edition.isbn, list)
|
||||
else str(new_edition.isbn)
|
||||
)
|
||||
new_edition.link = (
|
||||
f"https://www.lehmanns.de/search/quick?mediatype_id=2&q={isbn}"
|
||||
)
|
||||
except (IndexError, TypeError):
|
||||
isbn = None
|
||||
new_edition.in_library = cat.in_library(new_edition.ppn)
|
||||
response = new_editions
|
||||
|
||||
# client = SWB()
|
||||
# response: list["BookData"] = []
|
||||
# # First, search by title only
|
||||
# results = client.getBooks([f"pica.title={title}", f"pica.vlg={book.publisher}"])
|
||||
|
||||
# lehmanns = LehmannsClient()
|
||||
# results = lehmanns.search_by_title(title)
|
||||
# for result in results:
|
||||
# if "(eBook)" in result.title:
|
||||
# result.title = result.title.replace("(eBook)", "").strip()
|
||||
# swb_results = client.getBooks(
|
||||
# [
|
||||
# f"pica.tit={result.title}",
|
||||
# f"pica.vlg={result.publisher.split(',')[0]}",
|
||||
# ]
|
||||
# )
|
||||
# for swb in swb_results:
|
||||
# if swb.isbn == result.isbn:
|
||||
# result.ppn = swb.ppn
|
||||
# result.signature = swb.signature
|
||||
# response.append(result)
|
||||
# if (result.edition_number < swb.edition_number) and (
|
||||
# swb.year > result.year
|
||||
# ):
|
||||
# response.append(result)
|
||||
if response == []:
|
||||
return None
|
||||
# Remove duplicates based on ppn
|
||||
return (book, response)
|
||||
|
||||
@classmethod
|
||||
def _worker(cls, items: list["BookData"], q: Queue) -> None:
|
||||
"""Worker for one chunk; pushes ('result', ...), ('progress', 1), and ('done', None)."""
|
||||
try:
|
||||
for book in items:
|
||||
try:
|
||||
result = cls._process_book(book)
|
||||
except Exception:
|
||||
result = None
|
||||
if result is not None:
|
||||
q.put(("result", result))
|
||||
q.put(("progress", 1))
|
||||
finally:
|
||||
q.put(("done", None))
|
||||
|
||||
# ---------- thread entry point ----------
|
||||
|
||||
def run(self):
|
||||
total = len(self.entries)
|
||||
self.total_entries_signal.emit(total)
|
||||
|
||||
# start timer for metrics
|
||||
t0 = monotonic()
|
||||
|
||||
if total == 0:
|
||||
log.debug("No entries to process.")
|
||||
# emit metrics (zero work)
|
||||
self.rateSignal.emit(0.0)
|
||||
self.etaSignal.emit(0)
|
||||
self.resultsSignal.emit([])
|
||||
return
|
||||
|
||||
# Up to 4 workers; ~20 items per worker
|
||||
num_workers = min(THREAD_COUNT, max(1, ceil(total / THREAD_MIN_ITEMS)))
|
||||
chunks = self._split_evenly(self.entries, num_workers)
|
||||
sizes = [len(ch) for ch in chunks]
|
||||
|
||||
q: Queue = Queue()
|
||||
processed = 0
|
||||
finished_workers = 0
|
||||
|
||||
with ThreadPoolExecutor(max_workers=len(chunks)) as ex:
|
||||
futures = [ex.submit(self._worker, ch, q) for ch in chunks]
|
||||
|
||||
log.info(
|
||||
f"Launched {len(futures)} worker thread(s) for {total} entries: {sizes} entries per thread."
|
||||
)
|
||||
for idx, sz in enumerate(sizes, 1):
|
||||
log.debug(f"Thread {idx}: {sz} entries")
|
||||
|
||||
# Aggregate progress/results
|
||||
while finished_workers < len(chunks):
|
||||
try:
|
||||
kind, payload = q.get(timeout=0.1)
|
||||
except Empty:
|
||||
continue
|
||||
|
||||
if kind == "progress":
|
||||
processed += int(payload)
|
||||
self.updateSignal.emit(processed, total)
|
||||
self.updateProgress.emit(processed, total)
|
||||
|
||||
# ---- NEW: compute & emit metrics ----
|
||||
elapsed = max(1e-9, monotonic() - t0)
|
||||
rate = processed / elapsed # items per second
|
||||
remaining = max(0, total - processed)
|
||||
eta_sec = int(round(remaining / rate)) if rate > 0 else -1
|
||||
|
||||
self.rateSignal.emit(rate)
|
||||
# clamp negative just in case
|
||||
self.etaSignal.emit(max(0, eta_sec) if eta_sec >= 0 else -1)
|
||||
# -------------------------------------
|
||||
|
||||
elif kind == "result":
|
||||
self.results.append(payload)
|
||||
elif kind == "done":
|
||||
finished_workers += 1
|
||||
|
||||
# Final metrics on completion
|
||||
elapsed_total = max(1e-9, monotonic() - t0)
|
||||
final_rate = total / elapsed_total
|
||||
self.rateSignal.emit(final_rate)
|
||||
self.etaSignal.emit(0)
|
||||
|
||||
self.resultsSignal.emit(self.results)
|
||||
30
src/core/__init__.py
Normal file
30
src/core/__init__.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""Core domain models and business constants."""
|
||||
|
||||
from .models import (
|
||||
Apparat,
|
||||
ApparatData,
|
||||
Book,
|
||||
BookData,
|
||||
ELSA,
|
||||
MailData,
|
||||
Prof,
|
||||
SemapDocument,
|
||||
Subjects,
|
||||
XMLMailSubmission,
|
||||
)
|
||||
from .constants import *
|
||||
from .semester import Semester
|
||||
|
||||
__all__ = [
|
||||
"Apparat",
|
||||
"ApparatData",
|
||||
"Book",
|
||||
"BookData",
|
||||
"ELSA",
|
||||
"MailData",
|
||||
"Prof",
|
||||
"SemapDocument",
|
||||
"Subjects",
|
||||
"XMLMailSubmission",
|
||||
"Semester",
|
||||
]
|
||||
213
src/core/constants.py
Normal file
213
src/core/constants.py
Normal file
@@ -0,0 +1,213 @@
|
||||
APP_NRS = [i for i in range(1, 181)]
|
||||
|
||||
PROF_TITLES = [
|
||||
"Dr. mult.",
|
||||
"Dr. paed.",
|
||||
"Dr. rer. pol.",
|
||||
"Dr. sc. techn.",
|
||||
"Drs.",
|
||||
"Dr. agr.",
|
||||
"Dr. habil.",
|
||||
"Dr. oec.",
|
||||
"Dr. med.",
|
||||
"Dr. e. h.",
|
||||
"Dr. oec. publ.",
|
||||
"Dr. -Ing.",
|
||||
"Dr. theol.",
|
||||
"Dr. med. vet.",
|
||||
"Dr. ing.",
|
||||
"Dr. rer. nat.",
|
||||
"Dr. des.",
|
||||
"Dr. sc. mus.",
|
||||
"Dr. h. c.",
|
||||
"Dr. pharm.",
|
||||
"Dr. med. dent.",
|
||||
"Dr. phil. nat.",
|
||||
"Dr. phil.",
|
||||
"Dr. iur.",
|
||||
"Dr.",
|
||||
"Kein Titel",
|
||||
]
|
||||
|
||||
SEMAP_MEDIA_ACCOUNTS = {
|
||||
1: "1008000055",
|
||||
2: "1008000188",
|
||||
3: "1008000211",
|
||||
4: "1008000344",
|
||||
5: "1008000477",
|
||||
6: "1008000500",
|
||||
7: "1008000633",
|
||||
8: "1008000766",
|
||||
9: "1008000899",
|
||||
10: "1008000922",
|
||||
11: "1008001044",
|
||||
12: "1008001177",
|
||||
13: "1008001200",
|
||||
14: "1008001333",
|
||||
15: "1008001466",
|
||||
16: "1008001599",
|
||||
17: "1008001622",
|
||||
18: "1008001755",
|
||||
19: "1008001888",
|
||||
20: "1008001911",
|
||||
21: "1008002033",
|
||||
22: "1008002166",
|
||||
23: "1008002299",
|
||||
24: "1008002322",
|
||||
25: "1008002455",
|
||||
26: "1008002588",
|
||||
27: "1008002611",
|
||||
28: "1008002744",
|
||||
29: "1008002877",
|
||||
30: "1008002900",
|
||||
31: "1008003022",
|
||||
32: "1008003155",
|
||||
33: "1008003288",
|
||||
34: "1008003311",
|
||||
35: "1008003444",
|
||||
36: "1008003577",
|
||||
37: "1008003600",
|
||||
38: "1008003733",
|
||||
39: "1008003866",
|
||||
40: "1008003999",
|
||||
41: "1008004011",
|
||||
42: "1008004144",
|
||||
43: "1008004277",
|
||||
44: "1008004300",
|
||||
45: "1008004433",
|
||||
46: "1008004566",
|
||||
47: "1008004699",
|
||||
48: "1008004722",
|
||||
49: "1008004855",
|
||||
50: "1008004988",
|
||||
51: "1008005000",
|
||||
52: "1008005133",
|
||||
53: "1008005266",
|
||||
54: "1008005399",
|
||||
55: "1008005422",
|
||||
56: "1008005555",
|
||||
57: "1008005688",
|
||||
58: "1008005711",
|
||||
59: "1008005844",
|
||||
60: "1008005977",
|
||||
61: "1008006099",
|
||||
62: "1008006122",
|
||||
63: "1008006255",
|
||||
64: "1008006388",
|
||||
65: "1008006411",
|
||||
66: "1008006544",
|
||||
67: "1008006677",
|
||||
68: "1008006700",
|
||||
69: "1008006833",
|
||||
70: "1008006966",
|
||||
71: "1008007088",
|
||||
72: "1008007111",
|
||||
73: "1008007244",
|
||||
74: "1008007377",
|
||||
75: "1008007400",
|
||||
76: "1008007533",
|
||||
77: "1008007666",
|
||||
78: "1008007799",
|
||||
79: "1008007822",
|
||||
80: "1008007955",
|
||||
81: "1008008077",
|
||||
82: "1008008100",
|
||||
83: "1008008233",
|
||||
84: "1008008366",
|
||||
85: "1008008499",
|
||||
86: "1008008522",
|
||||
87: "1008008655",
|
||||
88: "1008008788",
|
||||
89: "1008008811",
|
||||
90: "1008008944",
|
||||
91: "1008009066",
|
||||
92: "1008009199",
|
||||
93: "1008009222",
|
||||
94: "1008009355",
|
||||
95: "1008009488",
|
||||
96: "1008009511",
|
||||
97: "1008009644",
|
||||
98: "1008009777",
|
||||
99: "1008009800",
|
||||
100: "1008009933",
|
||||
101: "1008010022",
|
||||
102: "1008010155",
|
||||
103: "1008010288",
|
||||
104: "1008010311",
|
||||
105: "1008010444",
|
||||
106: "1008010577",
|
||||
107: "1008010600",
|
||||
108: "1008010733",
|
||||
109: "1008010866",
|
||||
110: "1008010999",
|
||||
111: "1008011011",
|
||||
112: "1008011144",
|
||||
113: "1008011277",
|
||||
114: "1008011300",
|
||||
115: "1008011433",
|
||||
116: "1008011566",
|
||||
117: "1008011699",
|
||||
118: "1008011722",
|
||||
119: "1008011855",
|
||||
120: "1008011988",
|
||||
121: "1008012000",
|
||||
122: "1008012133",
|
||||
123: "1008012266",
|
||||
124: "1008012399",
|
||||
125: "1008012422",
|
||||
126: "1008012555",
|
||||
127: "1008012688",
|
||||
128: "1008012711",
|
||||
129: "1008012844",
|
||||
130: "1008012977",
|
||||
131: "1008013099",
|
||||
132: "1008013122",
|
||||
133: "1008013255",
|
||||
134: "1008013388",
|
||||
135: "1008013411",
|
||||
136: "1008013544",
|
||||
137: "1008013677",
|
||||
138: "1008013700",
|
||||
139: "1008013833",
|
||||
140: "1008013966",
|
||||
141: "1008014088",
|
||||
142: "1008014111",
|
||||
143: "1008014244",
|
||||
144: "1008014377",
|
||||
145: "1008014400",
|
||||
146: "1008014533",
|
||||
147: "1008014666",
|
||||
148: "1008014799",
|
||||
149: "1008014822",
|
||||
150: "1008014955",
|
||||
151: "1008015077",
|
||||
152: "1008015100",
|
||||
153: "1008015233",
|
||||
154: "1008015366",
|
||||
155: "1008015499",
|
||||
156: "1008015522",
|
||||
157: "1008015655",
|
||||
158: "1008015788",
|
||||
159: "1008015811",
|
||||
160: "1008015944",
|
||||
161: "1008016066",
|
||||
162: "1008016199",
|
||||
163: "1008016222",
|
||||
164: "1008016355",
|
||||
165: "1008016488",
|
||||
166: "1008016511",
|
||||
167: "1008016644",
|
||||
168: "1008016777",
|
||||
169: "1008016800",
|
||||
170: "1008016933",
|
||||
171: "1008017055",
|
||||
172: "1008017188",
|
||||
173: "1008017211",
|
||||
174: "1008017344",
|
||||
175: "1008017477",
|
||||
176: "1008017500",
|
||||
177: "1008017633",
|
||||
178: "1008017766",
|
||||
179: "1008017899",
|
||||
180: "1008017922",
|
||||
}
|
||||
410
src/core/models.py
Normal file
410
src/core/models.py
Normal file
@@ -0,0 +1,410 @@
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import regex
|
||||
|
||||
from src.logic.openai import name_tester, run_shortener, semester_converter
|
||||
from src.logic.semester import Semester
|
||||
|
||||
|
||||
@dataclass
|
||||
class Prof:
|
||||
id: Optional[int] = None
|
||||
_title: Optional[str] = None
|
||||
firstname: Optional[str] = None
|
||||
lastname: Optional[str] = None
|
||||
fullname: Optional[str] = None
|
||||
mail: Optional[str] = None
|
||||
telnr: Optional[str] = None
|
||||
|
||||
# add function that sets the data based on a dict
|
||||
def from_dict(self, data: dict[str, Union[str, int]]):
|
||||
for key, value in data.items():
|
||||
if hasattr(self, key):
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
@property
|
||||
def title(self) -> str:
|
||||
if self._title is None or self._title == "None":
|
||||
return ""
|
||||
return self._title
|
||||
|
||||
@title.setter
|
||||
def title(self, value: str):
|
||||
self._title = value
|
||||
|
||||
# add function that sets the data from a tuple
|
||||
def from_tuple(self, data: tuple[Union[str, int], ...]) -> "Prof":
|
||||
setattr(self, "id", data[0])
|
||||
setattr(self, "_title", data[1])
|
||||
setattr(self, "firstname", data[2])
|
||||
setattr(self, "lastname", data[3])
|
||||
setattr(self, "fullname", data[4])
|
||||
setattr(self, "mail", data[5])
|
||||
setattr(self, "telnr", data[6])
|
||||
return self
|
||||
|
||||
def name(self, comma: bool = False) -> Optional[str]:
|
||||
if self.firstname is None and self.lastname is None:
|
||||
if "," in self.fullname:
|
||||
self.firstname = self.fullname.split(",")[1].strip()
|
||||
self.lastname = self.fullname.split(",")[0].strip()
|
||||
else:
|
||||
return self.fullname
|
||||
|
||||
if comma:
|
||||
return f"{self.lastname}, {self.firstname}"
|
||||
return f"{self.lastname} {self.firstname}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class BookData:
|
||||
ppn: str | None = None
|
||||
title: str | None = None
|
||||
signature: str | None = None
|
||||
edition: str | None = None
|
||||
link: str | None = None
|
||||
isbn: Union[str, list[str], None] = field(default_factory=list)
|
||||
author: str | None = None
|
||||
language: Union[str, list[str], None] = field(default_factory=list)
|
||||
publisher: str | None = None
|
||||
place: str | None = None
|
||||
year: int | None = None
|
||||
pages: str | None = None
|
||||
library_location: str | None = None
|
||||
in_apparat: bool | None = False
|
||||
adis_idn: str | None = None
|
||||
old_book: Any | None = None
|
||||
media_type: str | None = None #
|
||||
in_library: bool | None = None # whether the book is in the library or not
|
||||
medianr: int | None = None # Media number in the library system
|
||||
|
||||
def __post_init__(self):
|
||||
self.library_location = (
|
||||
str(self.library_location) if self.library_location else None
|
||||
)
|
||||
if isinstance(self.language, list) and self.language:
|
||||
self.language = [lang.strip() for lang in self.language if lang.strip()]
|
||||
self.language = ",".join(self.language)
|
||||
self.year = regex.sub(r"[^\d]", "", str(self.year)) if self.year else None
|
||||
self.in_library = True if self.signature else False
|
||||
|
||||
def from_dict(self, data: dict) -> "BookData":
|
||||
for key, value in data.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
def merge(self, other: "BookData") -> "BookData":
|
||||
for key, value in other.__dict__.items():
|
||||
# merge lists, if the attribute is a list, extend it
|
||||
if isinstance(value, list):
|
||||
current_value = getattr(self, key)
|
||||
if current_value is None:
|
||||
current_value = []
|
||||
elif not isinstance(current_value, list):
|
||||
current_value = [current_value]
|
||||
# extend the list with the new values, but only if they are not already in the list
|
||||
for v in value:
|
||||
if v not in current_value:
|
||||
current_value.append(v)
|
||||
setattr(self, key, current_value)
|
||||
if value is not None and (
|
||||
getattr(self, key) is None or getattr(self, key) == ""
|
||||
):
|
||||
setattr(self, key, value)
|
||||
# in language, drop all entries that are longer than 3 characters
|
||||
if isinstance(self.language, list):
|
||||
self.language = [lang for lang in self.language if len(lang) <= 4]
|
||||
return self
|
||||
|
||||
@property
|
||||
def to_dict(self) -> str:
|
||||
"""Convert the dataclass to a dictionary."""
|
||||
data_dict = {
|
||||
key: value for key, value in self.__dict__.items() if value is not None
|
||||
}
|
||||
# remove old_book from data_dict
|
||||
if "old_book" in data_dict:
|
||||
del data_dict["old_book"]
|
||||
return json.dumps(data_dict, ensure_ascii=False)
|
||||
|
||||
def from_dataclass(self, dataclass: Optional[Any]) -> None:
|
||||
if dataclass is None:
|
||||
return
|
||||
for key, value in dataclass.__dict__.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
def get_book_type(self) -> str:
|
||||
if "Online" in self.pages:
|
||||
return "eBook"
|
||||
else:
|
||||
return "Druckausgabe"
|
||||
|
||||
def from_string(self, data: str) -> "BookData":
|
||||
ndata = json.loads(data)
|
||||
|
||||
return BookData(**ndata)
|
||||
|
||||
def from_LehmannsSearchResult(self, result: Any) -> "BookData":
|
||||
self.title = result.title
|
||||
self.author = "; ".join(result.authors) if result.authors else None
|
||||
self.edition = str(result.edition) if result.edition else None
|
||||
self.link = result.url
|
||||
self.isbn = (
|
||||
result.isbn13
|
||||
if isinstance(result.isbn13, list)
|
||||
else [result.isbn13]
|
||||
if result.isbn13
|
||||
else []
|
||||
)
|
||||
self.pages = str(result.pages) if result.pages else None
|
||||
self.publisher = result.publisher
|
||||
self.year = str(result.year) if result.year else None
|
||||
# self.pages = str(result.pages) if result.pages else None
|
||||
return self
|
||||
|
||||
@property
|
||||
def edition_number(self) -> Optional[int]:
|
||||
if self.edition is None:
|
||||
return 0
|
||||
match = regex.search(r"(\d+)", self.edition)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
return 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class MailData:
|
||||
subject: Optional[str] = None
|
||||
body: Optional[str] = None
|
||||
mailto: Optional[str] = None
|
||||
prof: Optional[str] = None
|
||||
|
||||
|
||||
class Subjects(Enum):
|
||||
BIOLOGY = (1, "Biologie")
|
||||
CHEMISTRY = (2, "Chemie")
|
||||
GERMAN = (3, "Deutsch")
|
||||
ENGLISH = (4, "Englisch")
|
||||
PEDAGOGY = (5, "Erziehungswissenschaft")
|
||||
FRENCH = (6, "Französisch")
|
||||
GEOGRAPHY = (7, "Geographie")
|
||||
HISTORY = (8, "Geschichte")
|
||||
HEALTH_EDUCATION = (9, "Gesundheitspädagogik")
|
||||
HTW = (10, "Haushalt / Textil")
|
||||
ART = (11, "Kunst")
|
||||
MATH_IT = (12, "Mathematik / Informatik")
|
||||
MEDIAPEDAGOGY = (13, "Medien in der Bildung")
|
||||
MUSIC = (14, "Musik")
|
||||
PHILOSOPHY = (15, "Philosophie")
|
||||
PHYSICS = (16, "Physik")
|
||||
POLITICS = (17, "Politikwissenschaft")
|
||||
PRORECTORATE = (18, "Prorektorat Lehre und Studium")
|
||||
PSYCHOLOGY = (19, "Psychologie")
|
||||
SOCIOLOGY = (20, "Soziologie")
|
||||
SPORT = (21, "Sport")
|
||||
TECHNIC = (22, "Technik")
|
||||
THEOLOGY = (23, "Theologie")
|
||||
ECONOMICS = (24, "Wirtschaftslehre")
|
||||
|
||||
@property
|
||||
def id(self) -> int:
|
||||
return self.value[0]
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self.value[1]
|
||||
|
||||
@classmethod
|
||||
def get_index(cls, name: str) -> Optional[int]:
|
||||
for i in cls:
|
||||
if i.name == name:
|
||||
return i.id - 1
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Apparat:
|
||||
id: int | None = None
|
||||
name: str | None = None
|
||||
prof_id: int | None = None
|
||||
subject: str | None = None
|
||||
appnr: int | None = None
|
||||
created_semester: str | None = None
|
||||
extended_at: str | None = None
|
||||
eternal: bool = False
|
||||
extend_until: str | None = None
|
||||
deleted: int | None = None
|
||||
deleted_date: str | None = None
|
||||
apparat_id_adis: str | None = None
|
||||
prof_id_adis: str | None = None
|
||||
konto: int | None = None
|
||||
|
||||
def from_tuple(self, data: tuple[Any, ...]) -> "Apparat":
|
||||
self.id = data[0]
|
||||
self.name = data[1]
|
||||
self.prof_id = data[2]
|
||||
self.subject = data[3]
|
||||
self.appnr = data[4]
|
||||
self.created_semester = data[5]
|
||||
self.extended_at = data[6]
|
||||
self.eternal = data[7]
|
||||
self.extend_until = data[8]
|
||||
self.deleted = data[9]
|
||||
self.deleted_date = data[10]
|
||||
self.apparat_id_adis = data[11]
|
||||
self.prof_id_adis = data[12]
|
||||
self.konto = data[13]
|
||||
return self
|
||||
|
||||
@property
|
||||
def get_semester(self) -> Optional[str]:
|
||||
if self.extend_until is not None:
|
||||
return self.extend_until
|
||||
else:
|
||||
return self.created_semester
|
||||
|
||||
|
||||
@dataclass
|
||||
class ELSA:
|
||||
id: int | None = None
|
||||
date: str | None = None
|
||||
semester: str | None = None
|
||||
prof_id: int | None = None
|
||||
|
||||
def from_tuple(self, data: tuple[Any, ...]) -> "ELSA":
|
||||
self.id = data[0]
|
||||
self.date = data[1]
|
||||
self.semester = data[2]
|
||||
self.prof_id = data[3]
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class ApparatData:
|
||||
prof: Prof = field(default_factory=Prof)
|
||||
apparat: Apparat = field(default_factory=Apparat)
|
||||
|
||||
|
||||
@dataclass
|
||||
class XMLMailSubmission:
|
||||
name: Optional[str] = None
|
||||
lastname: Optional[str] = None
|
||||
title: Optional[str] = None
|
||||
telno: Optional[int] = None
|
||||
email: Optional[str] = None
|
||||
app_name: Optional[str] = None
|
||||
subject: Optional[str] = None
|
||||
semester: Optional[Semester] = None
|
||||
books: Optional[list[BookData]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Book:
|
||||
author: str = None
|
||||
year: str = None
|
||||
edition: str = None
|
||||
title: str = None
|
||||
location: str = None
|
||||
publisher: str = None
|
||||
signature: str = None
|
||||
internal_notes: str = None
|
||||
|
||||
@property
|
||||
def has_signature(self) -> bool:
|
||||
return self.signature is not None and self.signature != ""
|
||||
|
||||
@property
|
||||
def is_empty(self) -> bool:
|
||||
return all(
|
||||
[
|
||||
self.author == "",
|
||||
self.year == "",
|
||||
self.edition == "",
|
||||
self.title == "",
|
||||
self.location == "",
|
||||
self.publisher == "",
|
||||
self.signature == "",
|
||||
self.internal_notes == "",
|
||||
]
|
||||
)
|
||||
|
||||
def from_dict(self, data: dict[str, Any]):
|
||||
for key, value in data.items():
|
||||
value = value.strip()
|
||||
if value == "\u2002\u2002\u2002\u2002\u2002":
|
||||
value = ""
|
||||
|
||||
if key == "Autorenname(n):Nachname, Vorname":
|
||||
self.author = value
|
||||
elif key == "Jahr/Auflage":
|
||||
self.year = value.split("/")[0] if "/" in value else value
|
||||
self.edition = value.split("/")[1] if "/" in value else ""
|
||||
elif key == "Titel":
|
||||
self.title = value
|
||||
elif key == "Ort und Verlag":
|
||||
self.location = value.split(",")[0] if "," in value else value
|
||||
self.publisher = value.split(",")[1] if "," in value else ""
|
||||
elif key == "Standnummer":
|
||||
self.signature = value.strip()
|
||||
elif key == "Interne Vermerke":
|
||||
self.internal_notes = value
|
||||
|
||||
|
||||
@dataclass
|
||||
class SemapDocument:
|
||||
subject: str = None
|
||||
phoneNumber: int = None
|
||||
mail: str = None
|
||||
title: str = None
|
||||
title_suggestions: list[str] = None
|
||||
semester: Union[str, Semester] = None
|
||||
books: list[Book] = None
|
||||
eternal: bool = False
|
||||
personName: str = None
|
||||
personTitle: str = None
|
||||
title_length = 0
|
||||
title_max_length = 0
|
||||
|
||||
def __post_init__(self):
|
||||
self.title_suggestions = []
|
||||
|
||||
@property
|
||||
def nameSetter(self):
|
||||
data = name_tester(self.personTitle)
|
||||
name = f"{data['last_name']}, {data['first_name']}"
|
||||
if data["title"] is not None:
|
||||
title = data["title"]
|
||||
self.personTitle = title
|
||||
self.personName = name
|
||||
self.title_length = len(self.title) + 3 + len(self.personName.split(",")[0])
|
||||
if self.title_length > 40:
|
||||
name_len = len(self.personName.split(",")[0])
|
||||
self.title_max_length = 38 - name_len
|
||||
suggestions = run_shortener(self.title, self.title_max_length)
|
||||
for suggestion in suggestions:
|
||||
self.title_suggestions.append(suggestion["shortened_string"])
|
||||
else:
|
||||
self.title_suggestions = []
|
||||
pass
|
||||
|
||||
@property
|
||||
def renameSemester(self) -> None:
|
||||
if self.semester:
|
||||
if ", Dauer" in self.semester:
|
||||
self.semester = self.semester.split(",")[0]
|
||||
self.eternal = True
|
||||
self.semester = Semester().from_string(self.semester)
|
||||
else:
|
||||
self.semester = Semester().from_string(
|
||||
semester_converter(self.semester)
|
||||
)
|
||||
|
||||
@property
|
||||
def signatures(self) -> list[str]:
|
||||
if self.books is not None:
|
||||
return [book.signature for book in self.books if book.has_signature]
|
||||
return []
|
||||
@@ -15,20 +15,13 @@ Key points
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
import loguru
|
||||
import sys
|
||||
from src import LOG_DIR
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
|
||||
# @dataclass
|
||||
class Semester:
|
||||
"""Represents a German university semester (WiSe or SoSe)."""
|
||||
|
||||
@@ -123,21 +116,22 @@ class Semester:
|
||||
# ------------------------------------------------------------------
|
||||
# Comparison helpers
|
||||
# ------------------------------------------------------------------
|
||||
def isPastSemester(self, other: "Semester") -> bool:
|
||||
if self.year < other.year:
|
||||
def isPastSemester(self, current: "Semester") -> bool:
|
||||
log.debug(f"Comparing {self} < {current}")
|
||||
if self.year < current.year:
|
||||
return True
|
||||
if self.year == other.year:
|
||||
if self.year == current.year:
|
||||
return (
|
||||
self.semester == "WiSe" and other.semester == "SoSe"
|
||||
self.semester == "WiSe" and current.semester == "SoSe"
|
||||
) # WiSe before next SoSe
|
||||
return False
|
||||
|
||||
def isFutureSemester(self, other: "Semester") -> bool:
|
||||
if self.year > other.year:
|
||||
def isFutureSemester(self, current: "Semester") -> bool:
|
||||
if self.year > current.year:
|
||||
return True
|
||||
if self.year == other.year:
|
||||
if self.year == current.year:
|
||||
return (
|
||||
self.semester == "SoSe" and other.semester == "WiSe"
|
||||
self.semester == "SoSe" and current.semester == "WiSe"
|
||||
) # SoSe after WiSe of same year
|
||||
return False
|
||||
|
||||
@@ -235,8 +229,20 @@ if __name__ == "__main__":
|
||||
s_start = Semester(6, "SoSe") # SoSe 6
|
||||
s_end = Semester(25, "WiSe") # WiSe 25/26
|
||||
chain = Semester.generate_missing(s_start, s_end)
|
||||
print("generate_missing:", [str(s) for s in chain])
|
||||
# print("generate_missing:", [str(s) for s in chain])
|
||||
|
||||
# Parsing demo ---------------------------------------------------------
|
||||
for label in ["SoSe 6", "WiSe 6/7", "wise 23/24", "WiSe 9"]:
|
||||
print("from_string:", label, "→", Semester.from_string(label))
|
||||
examples = [
|
||||
"SoSe 6",
|
||||
"WiSe 6/7",
|
||||
"WiSe 6",
|
||||
"SoSe 23",
|
||||
"WiSe 23/24",
|
||||
"WiSe 24",
|
||||
"WiSe 99/00",
|
||||
"SoSe 00",
|
||||
"WiSe 100/101", # test large year
|
||||
]
|
||||
for ex in examples:
|
||||
parsed = Semester.from_string(ex)
|
||||
print(f"'{ex}' → {parsed} ({parsed.year=}, {parsed.semester=})")
|
||||
5
src/database/__init__.py
Normal file
5
src/database/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Database layer for data persistence."""
|
||||
|
||||
from .connection import Database
|
||||
|
||||
__all__ = ["Database"]
|
||||
2008
src/database/connection.py
Normal file
2008
src/database/connection.py
Normal file
File diff suppressed because it is too large
Load Diff
132
src/database/migrations/V001__create_base_tables.sql
Normal file
132
src/database/migrations/V001__create_base_tables.sql
Normal file
@@ -0,0 +1,132 @@
|
||||
BEGIN TRANSACTION;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS semesterapparat (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
name TEXT,
|
||||
prof_id INTEGER,
|
||||
fach TEXT,
|
||||
appnr INTEGER,
|
||||
erstellsemester TEXT,
|
||||
verlängert_am TEXT,
|
||||
dauer BOOLEAN,
|
||||
verlängerung_bis TEXT,
|
||||
deletion_status INTEGER,
|
||||
deleted_date TEXT,
|
||||
apparat_id_adis INTEGER,
|
||||
prof_id_adis INTEGER,
|
||||
konto INTEGER,
|
||||
FOREIGN KEY (prof_id) REFERENCES prof (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS media (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
bookdata TEXT,
|
||||
app_id INTEGER,
|
||||
prof_id INTEGER,
|
||||
deleted INTEGER DEFAULT (0),
|
||||
available BOOLEAN,
|
||||
reservation BOOLEAN,
|
||||
FOREIGN KEY (prof_id) REFERENCES prof (id),
|
||||
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
id INTEGER PRIMARY KEY,
|
||||
filename TEXT,
|
||||
fileblob BLOB,
|
||||
app_id INTEGER,
|
||||
filetyp TEXT,
|
||||
prof_id INTEGER REFERENCES prof (id),
|
||||
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS messages (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
created_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
message TEXT NOT NULL,
|
||||
remind_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
user_id INTEGER NOT NULL,
|
||||
appnr INTEGER,
|
||||
FOREIGN KEY (user_id) REFERENCES user (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS prof (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
titel TEXT,
|
||||
fname TEXT,
|
||||
lname TEXT,
|
||||
fullname TEXT NOT NULL UNIQUE,
|
||||
mail TEXT,
|
||||
telnr TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS user (
|
||||
id integer NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
created_at datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
username TEXT NOT NULL UNIQUE,
|
||||
password TEXT NOT NULL,
|
||||
salt TEXT NOT NULL,
|
||||
role TEXT NOT NULL,
|
||||
email TEXT UNIQUE,
|
||||
name TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS subjects (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
name TEXT NOT NULL UNIQUE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS elsa (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
date TEXT NOT NULL,
|
||||
semester TEXT NOT NULL,
|
||||
prof_id INTEGER NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS elsa_files (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
filename TEXT NOT NULL,
|
||||
fileblob BLOB NOT NULL,
|
||||
elsa_id INTEGER NOT NULL,
|
||||
filetyp TEXT NOT NULL,
|
||||
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS elsa_media (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
work_author TEXT,
|
||||
section_author TEXT,
|
||||
year TEXT,
|
||||
edition TEXT,
|
||||
work_title TEXT,
|
||||
chapter_title TEXT,
|
||||
location TEXT,
|
||||
publisher TEXT,
|
||||
signature TEXT,
|
||||
issue TEXT,
|
||||
pages TEXT,
|
||||
isbn TEXT,
|
||||
type TEXT,
|
||||
elsa_id INTEGER NOT NULL,
|
||||
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS neweditions (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
new_bookdata TEXT,
|
||||
old_edition_id INTEGER,
|
||||
for_apparat INTEGER,
|
||||
ordered BOOLEAN DEFAULT (0),
|
||||
FOREIGN KEY (old_edition_id) REFERENCES media (id),
|
||||
FOREIGN KEY (for_apparat) REFERENCES semesterapparat (id)
|
||||
);
|
||||
|
||||
-- Helpful indices to speed up frequent lookups and joins
|
||||
CREATE INDEX IF NOT EXISTS idx_media_app_prof ON media(app_id, prof_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_media_deleted ON media(deleted);
|
||||
CREATE INDEX IF NOT EXISTS idx_media_available ON media(available);
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_remind_at ON messages(remind_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_semesterapparat_prof ON semesterapparat(prof_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_semesterapparat_appnr ON semesterapparat(appnr);
|
||||
|
||||
COMMIT;
|
||||
10
src/database/migrations/V002__create_table_webadis_login.sql
Normal file
10
src/database/migrations/V002__create_table_webadis_login.sql
Normal file
@@ -0,0 +1,10 @@
|
||||
BEGIN TRANSACTION;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS webadis_login (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
username TEXT NOT NULL,
|
||||
password TEXT NOT NULL
|
||||
);
|
||||
|
||||
COMMIT;
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
BEGIN TRANSACTION;
|
||||
|
||||
ALTER TABLE webadis_login
|
||||
ADD COLUMN effective_range TEXT;
|
||||
|
||||
COMMIT;
|
||||
112
src/database/schemas.py
Normal file
112
src/database/schemas.py
Normal file
@@ -0,0 +1,112 @@
|
||||
CREATE_TABLE_APPARAT = """CREATE TABLE semesterapparat (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
name TEXT,
|
||||
prof_id INTEGER,
|
||||
fach TEXT,
|
||||
appnr INTEGER,
|
||||
erstellsemester TEXT,
|
||||
verlängert_am TEXT,
|
||||
dauer BOOLEAN,
|
||||
verlängerung_bis TEXT,
|
||||
deletion_status INTEGER,
|
||||
deleted_date TEXT,
|
||||
apparat_id_adis INTEGER,
|
||||
prof_id_adis INTEGER,
|
||||
konto INTEGER,
|
||||
FOREIGN KEY (prof_id) REFERENCES prof (id)
|
||||
)"""
|
||||
CREATE_TABLE_MEDIA = """CREATE TABLE media (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
bookdata TEXT,
|
||||
app_id INTEGER,
|
||||
prof_id INTEGER,
|
||||
deleted INTEGER DEFAULT (0),
|
||||
available BOOLEAN,
|
||||
reservation BOOLEAN,
|
||||
FOREIGN KEY (prof_id) REFERENCES prof (id),
|
||||
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
|
||||
)"""
|
||||
|
||||
CREATE_TABLE_FILES = """CREATE TABLE files (
|
||||
id INTEGER PRIMARY KEY,
|
||||
filename TEXT,
|
||||
fileblob BLOB,
|
||||
app_id INTEGER,
|
||||
filetyp TEXT,
|
||||
prof_id INTEGER REFERENCES prof (id),
|
||||
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
|
||||
)"""
|
||||
CREATE_TABLE_MESSAGES = """CREATE TABLE messages (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
created_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
message TEXT NOT NULL,
|
||||
remind_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
user_id INTEGER NOT NULL,
|
||||
appnr INTEGER,
|
||||
FOREIGN KEY (user_id) REFERENCES user (id)
|
||||
)"""
|
||||
CREATE_TABLE_PROF = """CREATE TABLE prof (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
titel TEXT,
|
||||
fname TEXT,
|
||||
lname TEXT,
|
||||
fullname TEXT NOT NULL UNIQUE,
|
||||
mail TEXT,
|
||||
telnr TEXT
|
||||
)"""
|
||||
CREATE_TABLE_USER = """CREATE TABLE user (
|
||||
id integer NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
created_at datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
username TEXT NOT NULL UNIQUE,
|
||||
password TEXT NOT NULL,
|
||||
salt TEXT NOT NULL,
|
||||
role TEXT NOT NULL,
|
||||
email TEXT UNIQUE,
|
||||
name TEXT
|
||||
)"""
|
||||
CREATE_TABLE_SUBJECTS = """CREATE TABLE subjects (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
name TEXT NOT NULL UNIQUE
|
||||
)"""
|
||||
|
||||
CREATE_ELSA_TABLE = """CREATE TABLE elsa (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
date TEXT NOT NULL,
|
||||
semester TEXT NOT NULL,
|
||||
prof_id INTEGER NOT NULL
|
||||
)"""
|
||||
CREATE_ELSA_FILES_TABLE = """CREATE TABLE elsa_files (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
filename TEXT NOT NULL,
|
||||
fileblob BLOB NOT NULL,
|
||||
elsa_id INTEGER NOT NULL,
|
||||
filetyp TEXT NOT NULL,
|
||||
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
|
||||
)"""
|
||||
CREATE_ELSA_MEDIA_TABLE = """CREATE TABLE elsa_media (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
work_author TEXT,
|
||||
section_author TEXT,
|
||||
year TEXT,
|
||||
edition TEXT,
|
||||
work_title TEXT,
|
||||
chapter_title TEXT,
|
||||
location TEXT,
|
||||
publisher TEXT,
|
||||
signature TEXT,
|
||||
issue TEXT,
|
||||
pages TEXT,
|
||||
isbn TEXT,
|
||||
type TEXT,
|
||||
elsa_id INTEGER NOT NULL,
|
||||
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
|
||||
)"""
|
||||
CREATE_TABLE_NEWEDITIONS = """CREATE TABLE neweditions (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
new_bookdata TEXT,
|
||||
old_edition_id INTEGER,
|
||||
for_apparat INTEGER,
|
||||
ordered BOOLEAN DEFAULT (0),
|
||||
FOREIGN KEY (old_edition_id) REFERENCES media (id),
|
||||
FOREIGN KEY (for_apparat) REFERENCES semesterapparat (id)
|
||||
)"""
|
||||
2
src/documents/__init__.py
Normal file
2
src/documents/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
|
||||
|
||||
371
src/documents/generators.py
Normal file
371
src/documents/generators.py
Normal file
@@ -0,0 +1,371 @@
|
||||
import os
|
||||
from datetime import datetime
|
||||
from os.path import basename
|
||||
|
||||
from docx import Document
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||||
from docx.oxml import OxmlElement
|
||||
from docx.oxml.ns import qn
|
||||
from docx.shared import Cm, Pt, RGBColor
|
||||
|
||||
from src import settings
|
||||
from src.shared.logging import log
|
||||
|
||||
logger = log
|
||||
|
||||
font = "Cascadia Mono"
|
||||
|
||||
|
||||
def print_document(file: str) -> None:
|
||||
# send document to printer as attachment of email
|
||||
import smtplib
|
||||
from email.mime.application import MIMEApplication
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
|
||||
smtp = settings.mail.smtp_server
|
||||
port = settings.mail.port
|
||||
sender_email = settings.mail.sender
|
||||
password = settings.mail.password
|
||||
receiver = settings.mail.printer_mail
|
||||
message = MIMEMultipart()
|
||||
message["From"] = sender_email
|
||||
message["To"] = receiver
|
||||
message["cc"] = settings.mail.sender
|
||||
message["Subject"] = "."
|
||||
mail_body = "."
|
||||
message.attach(MIMEText(mail_body, "html"))
|
||||
with open(file, "rb") as fil:
|
||||
part = MIMEApplication(fil.read(), Name=basename(file))
|
||||
# After the file is closed
|
||||
part["Content-Disposition"] = 'attachment; filename="%s"' % basename(file)
|
||||
message.attach(part)
|
||||
mail = message.as_string()
|
||||
with smtplib.SMTP_SSL(smtp, port) as server:
|
||||
server.connect(smtp, port)
|
||||
server.login(settings.mail.user_name, password)
|
||||
server.sendmail(sender_email, receiver, mail)
|
||||
server.quit()
|
||||
log.success("Mail sent")
|
||||
|
||||
|
||||
class SemesterError(Exception):
|
||||
"""Custom exception for semester-related errors."""
|
||||
|
||||
def __init__(self, message: str):
|
||||
super().__init__(message)
|
||||
log.error(message)
|
||||
|
||||
def __str__(self):
|
||||
return f"SemesterError: {self.args[0]}"
|
||||
|
||||
|
||||
class SemesterDocument:
|
||||
def __init__(
|
||||
self,
|
||||
apparats: list[tuple[int, str]],
|
||||
semester: str,
|
||||
filename: str,
|
||||
full: bool = False,
|
||||
):
|
||||
assert isinstance(apparats, list), SemesterError(
|
||||
"Apparats must be a list of tuples"
|
||||
)
|
||||
assert all(isinstance(apparat, tuple) for apparat in apparats), SemesterError(
|
||||
"Apparats must be a list of tuples"
|
||||
)
|
||||
assert all(isinstance(apparat[0], int) for apparat in apparats), SemesterError(
|
||||
"Apparat numbers must be integers"
|
||||
)
|
||||
assert all(isinstance(apparat[1], str) for apparat in apparats), SemesterError(
|
||||
"Apparat names must be strings"
|
||||
)
|
||||
assert isinstance(semester, str), SemesterError("Semester must be a string")
|
||||
assert "." not in filename and isinstance(filename, str), SemesterError(
|
||||
"Filename must be a string and not contain an extension"
|
||||
)
|
||||
self.doc = Document()
|
||||
self.apparats = apparats
|
||||
self.semester = semester
|
||||
self.table_font_normal = font
|
||||
self.table_font_bold = font
|
||||
self.header_font = font
|
||||
self.header_font_size = Pt(26)
|
||||
self.sub_header_font_size = Pt(18)
|
||||
self.table_font_size = Pt(10)
|
||||
self.color_red = RGBColor(255, 0, 0)
|
||||
self.color_blue = RGBColor(0, 0, 255)
|
||||
self.filename = filename
|
||||
if full:
|
||||
log.info("Full document generation")
|
||||
self.cleanup
|
||||
log.info("Cleanup done")
|
||||
self.make_document()
|
||||
log.info("Document created")
|
||||
self.create_pdf()
|
||||
log.info("PDF created")
|
||||
print_document(self.filename + ".pdf")
|
||||
log.info("Document printed")
|
||||
|
||||
def set_table_border(self, table):
|
||||
"""
|
||||
Adds a full border to the table.
|
||||
|
||||
:param table: Table object to which the border will be applied.
|
||||
"""
|
||||
tbl = table._element
|
||||
tbl_pr = tbl.xpath("w:tblPr")[0]
|
||||
tbl_borders = OxmlElement("w:tblBorders")
|
||||
|
||||
# Define border styles
|
||||
for border_name in ["top", "left", "bottom", "right", "insideH", "insideV"]:
|
||||
border = OxmlElement(f"w:{border_name}")
|
||||
border.set(qn("w:val"), "single")
|
||||
border.set(qn("w:sz"), "4") # Thickness of the border
|
||||
border.set(qn("w:space"), "0")
|
||||
border.set(qn("w:color"), "000000") # Black color
|
||||
tbl_borders.append(border)
|
||||
|
||||
tbl_pr.append(tbl_borders)
|
||||
|
||||
def create_sorted_table(self) -> None:
|
||||
# Sort the apparats list by the string in the tuple (index 1)
|
||||
self.apparats.sort(key=lambda x: x[1])
|
||||
# Create a table with rows equal to the length of the apparats list
|
||||
table = self.doc.add_table(rows=len(self.apparats), cols=2)
|
||||
table.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
|
||||
# Set column widths by directly modifying the cell properties
|
||||
widths = [Cm(1.19), Cm(10)]
|
||||
for col_idx, width in enumerate(widths):
|
||||
for cell in table.columns[col_idx].cells:
|
||||
cell_width_element = cell._element.xpath(".//w:tcPr")[0]
|
||||
tcW = OxmlElement("w:tcW")
|
||||
tcW.set(qn("w:w"), str(int(width.cm * 567))) # Convert cm to twips
|
||||
tcW.set(qn("w:type"), "dxa")
|
||||
cell_width_element.append(tcW)
|
||||
|
||||
# Adjust row heights
|
||||
for row in table.rows:
|
||||
trPr = row._tr.get_or_add_trPr() # Get or add the <w:trPr> element
|
||||
trHeight = OxmlElement("w:trHeight")
|
||||
trHeight.set(
|
||||
qn("w:val"), str(int(Pt(15).pt * 20))
|
||||
) # Convert points to twips
|
||||
trHeight.set(qn("w:hRule"), "exact") # Use "exact" for fixed height
|
||||
trPr.append(trHeight)
|
||||
|
||||
# Fill the table with sorted data
|
||||
for row_idx, (number, name) in enumerate(self.apparats):
|
||||
row = table.rows[row_idx]
|
||||
|
||||
# Set font for the first column (number)
|
||||
cell_number_paragraph = row.cells[0].paragraphs[0]
|
||||
cell_number_run = cell_number_paragraph.add_run(str(number))
|
||||
cell_number_run.font.name = self.table_font_bold
|
||||
cell_number_run.font.size = self.table_font_size
|
||||
cell_number_run.font.bold = True
|
||||
cell_number_run.font.color.rgb = self.color_red
|
||||
cell_number_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
|
||||
# Set font for the second column (name)
|
||||
cell_name_paragraph = row.cells[1].paragraphs[0]
|
||||
words = name.split()
|
||||
if words:
|
||||
# Add the first word in bold
|
||||
bold_run = cell_name_paragraph.add_run(words[0])
|
||||
bold_run.font.bold = True
|
||||
bold_run.font.name = self.table_font_bold
|
||||
bold_run.font.size = self.table_font_size
|
||||
|
||||
# Add the rest of the words normally
|
||||
if len(words) > 1:
|
||||
normal_run = cell_name_paragraph.add_run(" " + " ".join(words[1:]))
|
||||
normal_run.font.name = self.table_font_normal
|
||||
normal_run.font.size = self.table_font_size
|
||||
cell_name_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
|
||||
|
||||
self.set_table_border(table)
|
||||
|
||||
def make_document(self):
|
||||
# Create a new Document
|
||||
section = self.doc.sections[0]
|
||||
section.top_margin = Cm(2.54) # Default 1 inch (can adjust as needed)
|
||||
section.bottom_margin = Cm(1.5) # Set bottom margin to 1.5 cm
|
||||
section.left_margin = Cm(2.54) # Default 1 inch
|
||||
section.right_margin = Cm(2.54) # Default 1 inch
|
||||
|
||||
# Add the current date
|
||||
current_date = datetime.now().strftime("%Y-%m-%d")
|
||||
date_paragraph = self.doc.add_paragraph(current_date)
|
||||
date_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
|
||||
|
||||
# Add a header
|
||||
semester = f"Semesterapparate {self.semester}"
|
||||
header = self.doc.add_paragraph(semester)
|
||||
header.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
header_run = header.runs[0]
|
||||
header_run.font.name = self.header_font
|
||||
header_run.font.size = self.header_font_size
|
||||
header_run.font.bold = True
|
||||
header_run.font.color.rgb = self.color_blue
|
||||
|
||||
sub_header = self.doc.add_paragraph("(Alphabetisch)")
|
||||
sub_header.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
sub_header_run = sub_header.runs[0]
|
||||
sub_header_run.font.name = self.header_font
|
||||
sub_header_run.font.size = self.sub_header_font_size
|
||||
sub_header_run.font.color.rgb = self.color_red
|
||||
|
||||
self.doc.add_paragraph("")
|
||||
|
||||
self.create_sorted_table()
|
||||
|
||||
def save_document(self, name: str) -> None:
|
||||
# Save the document
|
||||
self.doc.save(name)
|
||||
|
||||
def create_pdf(self) -> None:
|
||||
# Save the document
|
||||
import comtypes.client
|
||||
|
||||
word = comtypes.client.CreateObject("Word.Application") # type: ignore
|
||||
self.save_document(self.filename + ".docx")
|
||||
docpath = os.path.abspath(self.filename + ".docx")
|
||||
doc = word.Documents.Open(docpath)
|
||||
curdir = os.getcwd()
|
||||
doc.SaveAs(f"{curdir}/{self.filename}.pdf", FileFormat=17)
|
||||
doc.Close()
|
||||
word.Quit()
|
||||
log.debug("PDF saved")
|
||||
|
||||
@property
|
||||
def cleanup(self) -> None:
|
||||
if os.path.exists(f"{self.filename}.docx"):
|
||||
os.remove(f"{self.filename}.docx")
|
||||
os.remove(f"{self.filename}.pdf")
|
||||
|
||||
@property
|
||||
def send(self) -> None:
|
||||
print_document(self.filename + ".pdf")
|
||||
log.debug("Document sent to printer")
|
||||
|
||||
|
||||
class SemapSchilder:
|
||||
def __init__(self, entries: list[str]):
|
||||
self.entries = entries
|
||||
self.filename = "Schilder"
|
||||
self.font_size = Pt(23)
|
||||
self.font_name = font
|
||||
self.doc = Document()
|
||||
self.define_doc_properties()
|
||||
self.add_entries()
|
||||
self.cleanup()
|
||||
self.create_pdf()
|
||||
|
||||
def define_doc_properties(self):
|
||||
# set the doc to have a top margin of 1cm, left and right are 0.5cm, bottom is 0cm
|
||||
section = self.doc.sections[0]
|
||||
section.top_margin = Cm(1)
|
||||
section.bottom_margin = Cm(0)
|
||||
section.left_margin = Cm(0.5)
|
||||
section.right_margin = Cm(0.5)
|
||||
|
||||
# set the font to Times New Roman, size 23 bold, color black
|
||||
for paragraph in self.doc.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
run.font.name = self.font_name
|
||||
run.font.size = self.font_size
|
||||
run.font.bold = True
|
||||
run.font.color.rgb = RGBColor(0, 0, 0)
|
||||
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
|
||||
# if the length of the text is
|
||||
|
||||
def add_entries(self):
|
||||
for entry in self.entries:
|
||||
paragraph = self.doc.add_paragraph(entry)
|
||||
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
paragraph.paragraph_format.line_spacing = Pt(23) # Set fixed line spacing
|
||||
paragraph.paragraph_format.space_before = Pt(2) # Remove spacing before
|
||||
paragraph.paragraph_format.space_after = Pt(2) # Remove spacing after
|
||||
|
||||
run = paragraph.runs[0]
|
||||
run.font.name = self.font_name
|
||||
run.font.size = self.font_size
|
||||
run.font.bold = True
|
||||
run.font.color.rgb = RGBColor(0, 0, 0)
|
||||
|
||||
# Add a line to be used as a guideline for cutting
|
||||
line = self.doc.add_paragraph()
|
||||
line.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
line.paragraph_format.line_spacing = Pt(23) # Match line spacing
|
||||
line.paragraph_format.space_before = Pt(2) # Remove spacing before
|
||||
line.paragraph_format.space_after = Pt(2) # Remove spacing after
|
||||
line.add_run("--------------------------")
|
||||
|
||||
def save_document(self):
|
||||
# Save the document
|
||||
self.doc.save(f"{self.filename}.docx")
|
||||
log.debug(f"Document saved as {self.filename}.docx")
|
||||
|
||||
def create_pdf(self) -> None:
|
||||
# Save the document
|
||||
import comtypes.client
|
||||
|
||||
word = comtypes.client.CreateObject("Word.Application") # type: ignore
|
||||
self.save_document()
|
||||
docpath = os.path.abspath(f"{self.filename}.docx")
|
||||
doc = word.Documents.Open(docpath)
|
||||
curdir = os.getcwd()
|
||||
doc.SaveAs(f"{curdir}/{self.filename}.pdf", FileFormat=17)
|
||||
doc.Close()
|
||||
word.Quit()
|
||||
log.debug("PDF saved")
|
||||
|
||||
def cleanup(self) -> None:
|
||||
if os.path.exists(f"{self.filename}.docx"):
|
||||
os.remove(f"{self.filename}.docx")
|
||||
if os.path.exists(f"{self.filename}.pdf"):
|
||||
os.remove(f"{self.filename}.pdf")
|
||||
|
||||
@property
|
||||
def send(self) -> None:
|
||||
print_document(self.filename + ".pdf")
|
||||
log.debug("Document sent to printer")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
entries = [
|
||||
"Lüsebrink (Theorie und Praxis der Leichtathletik)",
|
||||
"Kulovics (ISP-Betreuung)",
|
||||
"Köhler (Ausgewählte Aspekte der materiellen Kultur Textil)",
|
||||
"Grau (Young Adult Literature)",
|
||||
"Schiebel (Bewegung II:Ausgewählte Problemfelder)",
|
||||
"Schiebel (Ernährungswiss. Perspektive)",
|
||||
"Park (Kommunikation und Kooperation)",
|
||||
"Schiebel (Schwimmen)",
|
||||
"Huppertz (Philosophieren mit Kindern)",
|
||||
"Heyl (Heyl)",
|
||||
"Reuter (Verschiedene Veranstaltungen)",
|
||||
"Reinhold (Arithmetik und mathematisches Denken)",
|
||||
"Wirtz (Forschungsmethoden)",
|
||||
"Schleider (Essstörungen)",
|
||||
"Schleider (Klinische Psychologie)",
|
||||
"Schleider (Doktorandenkolloquium)",
|
||||
"Schleider (Störungen Sozialverhaltens/Delinquenz)",
|
||||
"Burth (EU Forschung im Int. Vergleich/EU Gegenstand biling. Didaktik)",
|
||||
"Reinhardt (Einführung Politikdidaktik)",
|
||||
"Schleider (Psychologische Interventionsmethoden)",
|
||||
"Schleider (ADHS)",
|
||||
"Schleider (Beratung und Teamarbeit)",
|
||||
"Schleider (LRS)",
|
||||
"Schleider (Gesundheitspsychologie)",
|
||||
"Schleider (Elterntraining)",
|
||||
"Wulff (Hochschulzertifikat DaZ)",
|
||||
"Dinkelaker ( )",
|
||||
"Droll (Einführung in die Sprachwissenschaft)",
|
||||
"Karoß (Gymnastik - Sich Bewegen mit und ohne Handgeräte)",
|
||||
"Sahrai (Kindheit und Gesellschaft)",
|
||||
]
|
||||
doc = SemapSchilder(entries)
|
||||
631
src/logic/SRU.py
Normal file
631
src/logic/SRU.py
Normal file
@@ -0,0 +1,631 @@
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
|
||||
# centralized logging used via src.shared.logging
|
||||
from src.logic.dataclass import BookData
|
||||
from src.shared.logging import log
|
||||
|
||||
log # ensure imported logger is referenced
|
||||
|
||||
|
||||
# -----------------------
|
||||
# Dataclasses
|
||||
# -----------------------
|
||||
|
||||
|
||||
# --- MARC XML structures ---
|
||||
@dataclass
|
||||
class ControlField:
|
||||
tag: str
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubField:
|
||||
code: str
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class DataField:
|
||||
tag: str
|
||||
ind1: str = " "
|
||||
ind2: str = " "
|
||||
subfields: List[SubField] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarcRecord:
|
||||
leader: str
|
||||
controlfields: List[ControlField] = field(default_factory=list)
|
||||
datafields: List[DataField] = field(default_factory=list)
|
||||
|
||||
|
||||
# --- SRU record wrapper ---
|
||||
@dataclass
|
||||
class Record:
|
||||
recordSchema: str
|
||||
recordPacking: str
|
||||
recordData: MarcRecord
|
||||
recordPosition: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class EchoedSearchRequest:
|
||||
version: str
|
||||
query: str
|
||||
maximumRecords: int
|
||||
recordPacking: str
|
||||
recordSchema: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchRetrieveResponse:
|
||||
version: str
|
||||
numberOfRecords: int
|
||||
records: List[Record] = field(default_factory=list)
|
||||
echoedSearchRetrieveRequest: Optional[EchoedSearchRequest] = None
|
||||
|
||||
|
||||
# -----------------------
|
||||
# Parser
|
||||
# -----------------------
|
||||
|
||||
ZS = "http://www.loc.gov/zing/srw/"
|
||||
MARC = "http://www.loc.gov/MARC21/slim"
|
||||
NS = {"zs": ZS, "marc": MARC}
|
||||
|
||||
|
||||
def _text(elem: Optional[ET.Element]) -> str:
|
||||
return (elem.text or "") if elem is not None else ""
|
||||
|
||||
|
||||
def _req_text(parent: ET.Element, path: str) -> Optional[str]:
|
||||
el = parent.find(path, NS)
|
||||
if el is None or el.text is None:
|
||||
return None
|
||||
return el.text
|
||||
|
||||
|
||||
def parse_marc_record(record_el: ET.Element) -> MarcRecord:
|
||||
"""
|
||||
record_el is the <marc:record> element (default ns MARC in your sample)
|
||||
"""
|
||||
# leader
|
||||
leader_text = _req_text(record_el, "marc:leader") or ""
|
||||
|
||||
# controlfields
|
||||
controlfields: List[ControlField] = []
|
||||
for cf in record_el.findall("marc:controlfield", NS):
|
||||
tag = cf.get("tag", "").strip()
|
||||
controlfields.append(ControlField(tag=tag, value=_text(cf)))
|
||||
|
||||
# datafields
|
||||
datafields: List[DataField] = []
|
||||
for df in record_el.findall("marc:datafield", NS):
|
||||
tag = df.get("tag", "").strip()
|
||||
ind1 = df.get("ind1") or " "
|
||||
ind2 = df.get("ind2") or " "
|
||||
subfields: List[SubField] = []
|
||||
for sf in df.findall("marc:subfield", NS):
|
||||
code = sf.get("code", "")
|
||||
subfields.append(SubField(code=code, value=_text(sf)))
|
||||
datafields.append(DataField(tag=tag, ind1=ind1, ind2=ind2, subfields=subfields))
|
||||
|
||||
return MarcRecord(
|
||||
leader=leader_text, controlfields=controlfields, datafields=datafields
|
||||
)
|
||||
|
||||
|
||||
def parse_record(zs_record_el: ET.Element) -> Record:
|
||||
recordSchema = _req_text(zs_record_el, "zs:recordSchema") or ""
|
||||
recordPacking = _req_text(zs_record_el, "zs:recordPacking") or ""
|
||||
|
||||
# recordData contains a MARC <record> with default MARC namespace in your sample
|
||||
recordData_el = zs_record_el.find("zs:recordData", NS)
|
||||
if recordData_el is None:
|
||||
raise ValueError("Missing zs:recordData")
|
||||
|
||||
marc_record_el = recordData_el.find("marc:record", NS)
|
||||
if marc_record_el is None:
|
||||
# If the MARC record uses default ns (xmlns="...") ElementTree still needs the ns-qualified name
|
||||
# We already searched with prefix; this covers both default and prefixed cases.
|
||||
raise ValueError("Missing MARC21 record inside zs:recordData")
|
||||
|
||||
marc_record = parse_marc_record(marc_record_el)
|
||||
|
||||
recordPosition = int(_req_text(zs_record_el, "zs:recordPosition") or "0")
|
||||
return Record(
|
||||
recordSchema=recordSchema,
|
||||
recordPacking=recordPacking,
|
||||
recordData=marc_record,
|
||||
recordPosition=recordPosition,
|
||||
)
|
||||
|
||||
|
||||
def parse_echoed_request(root: ET.Element) -> Optional[EchoedSearchRequest]:
|
||||
el = root.find("zs:echoedSearchRetrieveRequest", NS)
|
||||
if el is None:
|
||||
return None
|
||||
|
||||
# Be permissive with missing fields
|
||||
version = _text(el.find("zs:version", NS))
|
||||
query = _text(el.find("zs:query", NS))
|
||||
maximumRecords_text = _text(el.find("zs:maximumRecords", NS)) or "0"
|
||||
recordPacking = _text(el.find("zs:recordPacking", NS))
|
||||
recordSchema = _text(el.find("zs:recordSchema", NS))
|
||||
|
||||
try:
|
||||
maximumRecords = int(maximumRecords_text)
|
||||
except ValueError:
|
||||
maximumRecords = 0
|
||||
|
||||
return EchoedSearchRequest(
|
||||
version=version,
|
||||
query=query,
|
||||
maximumRecords=maximumRecords,
|
||||
recordPacking=recordPacking,
|
||||
recordSchema=recordSchema,
|
||||
)
|
||||
|
||||
|
||||
def parse_search_retrieve_response(
|
||||
xml_str: Union[str, bytes],
|
||||
) -> SearchRetrieveResponse:
|
||||
root = ET.fromstring(xml_str)
|
||||
|
||||
# Root is zs:searchRetrieveResponse
|
||||
version = _req_text(root, "zs:version")
|
||||
numberOfRecords = int(_req_text(root, "zs:numberOfRecords") or "0")
|
||||
|
||||
records_parent = root.find("zs:records", NS)
|
||||
records: List[Record] = []
|
||||
if records_parent is not None:
|
||||
for r in records_parent.findall("zs:record", NS):
|
||||
records.append(parse_record(r))
|
||||
|
||||
echoed = parse_echoed_request(root)
|
||||
|
||||
return SearchRetrieveResponse(
|
||||
version=version,
|
||||
numberOfRecords=numberOfRecords,
|
||||
records=records,
|
||||
echoedSearchRetrieveRequest=echoed,
|
||||
)
|
||||
|
||||
|
||||
# --- Query helpers over MarcRecord ---
|
||||
|
||||
|
||||
def iter_datafields(
|
||||
rec: MarcRecord,
|
||||
tag: Optional[str] = None,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
) -> Iterable[DataField]:
|
||||
"""Yield datafields, optionally filtered by tag/indicators."""
|
||||
for df in rec.datafields:
|
||||
if tag is not None and df.tag != tag:
|
||||
continue
|
||||
if ind1 is not None and df.ind1 != ind1:
|
||||
continue
|
||||
if ind2 is not None and df.ind2 != ind2:
|
||||
continue
|
||||
yield df
|
||||
|
||||
|
||||
def subfield_values(
|
||||
rec: MarcRecord,
|
||||
tag: str,
|
||||
code: str,
|
||||
*,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
) -> List[str]:
|
||||
"""All values for subfield `code` in every `tag` field (respecting indicators)."""
|
||||
out: List[str] = []
|
||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||
out.extend(sf.value for sf in df.subfields if sf.code == code)
|
||||
return out
|
||||
|
||||
|
||||
def first_subfield_value(
|
||||
rec: MarcRecord,
|
||||
tag: str,
|
||||
code: str,
|
||||
*,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
default: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""First value for subfield `code` in `tag` (respecting indicators)."""
|
||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
return sf.value
|
||||
return default
|
||||
|
||||
|
||||
def find_datafields_with_subfields(
|
||||
rec: MarcRecord,
|
||||
tag: str,
|
||||
*,
|
||||
where_all: Optional[Dict[str, str]] = None,
|
||||
where_any: Optional[Dict[str, str]] = None,
|
||||
casefold: bool = False,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
) -> List[DataField]:
|
||||
"""
|
||||
Return datafields of `tag` whose subfields match constraints:
|
||||
- where_all: every (code -> exact value) must be present
|
||||
- where_any: at least one (code -> exact value) present
|
||||
Set `casefold=True` for case-insensitive comparison.
|
||||
"""
|
||||
where_all = where_all or {}
|
||||
where_any = where_any or {}
|
||||
matched: List[DataField] = []
|
||||
|
||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||
# Map code -> list of values (with optional casefold applied)
|
||||
vals: Dict[str, List[str]] = {}
|
||||
for sf in df.subfields:
|
||||
v = sf.value.casefold() if casefold else sf.value
|
||||
vals.setdefault(sf.code, []).append(v)
|
||||
|
||||
ok = True
|
||||
for c, v in where_all.items():
|
||||
vv = v.casefold() if casefold else v
|
||||
if c not in vals or vv not in vals[c]:
|
||||
ok = False
|
||||
break
|
||||
|
||||
if ok and where_any:
|
||||
any_ok = any(
|
||||
(c in vals) and ((v.casefold() if casefold else v) in vals[c])
|
||||
for c, v in where_any.items()
|
||||
)
|
||||
if not any_ok:
|
||||
ok = False
|
||||
|
||||
if ok:
|
||||
matched.append(df)
|
||||
|
||||
return matched
|
||||
|
||||
|
||||
def controlfield_value(
|
||||
rec: MarcRecord, tag: str, default: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Get the first controlfield value by tag (e.g., '001', '005')."""
|
||||
for cf in rec.controlfields:
|
||||
if cf.tag == tag:
|
||||
return cf.value
|
||||
return default
|
||||
|
||||
|
||||
def datafields_value(
|
||||
data: List[DataField], code: str, default: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Get the first value for a specific subfield code in a list of datafields."""
|
||||
for df in data:
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
return sf.value
|
||||
return default
|
||||
|
||||
|
||||
def datafield_value(
|
||||
df: DataField, code: str, default: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Get the first value for a specific subfield code in a datafield."""
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
return sf.value
|
||||
return default
|
||||
|
||||
|
||||
def _smart_join_title(a: str, b: Optional[str]) -> str:
|
||||
"""
|
||||
Join 245 $a and $b with MARC-style punctuation.
|
||||
If $b is present, join with ' : ' unless either side already supplies punctuation.
|
||||
"""
|
||||
a = a.strip()
|
||||
if not b:
|
||||
return a
|
||||
b = b.strip()
|
||||
if a.endswith((":", ";", "/")) or b.startswith((":", ";", "/")):
|
||||
return f"{a} {b}"
|
||||
return f"{a} : {b}"
|
||||
|
||||
|
||||
def subfield_values_from_fields(
|
||||
fields: Iterable[DataField],
|
||||
code: str,
|
||||
) -> List[str]:
|
||||
"""All subfield values with given `code` across a list of DataField."""
|
||||
return [sf.value for df in fields for sf in df.subfields if sf.code == code]
|
||||
|
||||
|
||||
def first_subfield_value_from_fields(
|
||||
fields: Iterable[DataField],
|
||||
code: str,
|
||||
default: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""First subfield value with given `code` across a list of DataField."""
|
||||
for df in fields:
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
return sf.value
|
||||
return default
|
||||
|
||||
|
||||
def subfield_value_pairs_from_fields(
|
||||
fields: Iterable[DataField],
|
||||
code: str,
|
||||
) -> List[Tuple[DataField, str]]:
|
||||
"""
|
||||
Return (DataField, value) pairs for all subfields with `code`.
|
||||
Useful if you need to know which field a value came from.
|
||||
"""
|
||||
out: List[Tuple[DataField, str]] = []
|
||||
for df in fields:
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
out.append((df, sf.value))
|
||||
return out
|
||||
|
||||
|
||||
def book_from_marc(rec: MarcRecord) -> BookData:
|
||||
# PPN from controlfield 001
|
||||
ppn = controlfield_value(rec, "001")
|
||||
|
||||
# Title = 245 $a + 245 $b (if present)
|
||||
t_a = first_subfield_value(rec, "245", "a")
|
||||
t_b = first_subfield_value(rec, "245", "b")
|
||||
title = _smart_join_title(t_a, t_b) if t_a else None
|
||||
|
||||
# Signature = 924 where $9 == "Frei 129" → take that field's $g
|
||||
frei_fields = find_datafields_with_subfields(
|
||||
rec, "924", where_all={"9": "Frei 129"}
|
||||
)
|
||||
signature = first_subfield_value_from_fields(frei_fields, "g")
|
||||
|
||||
# Year = 264 $c (prefer ind2="1" publication; fallback to any 264)
|
||||
year = first_subfield_value(rec, "264", "c", ind2="1") or first_subfield_value(
|
||||
rec, "264", "c"
|
||||
)
|
||||
isbn = subfield_values(rec, "020", "a")
|
||||
mediatype = first_subfield_value(rec, "338", "a")
|
||||
lang = subfield_values(rec, "041", "a")
|
||||
authors = subfield_values(rec, "700", "a")
|
||||
author = None
|
||||
if authors:
|
||||
author = "; ".join(authors)
|
||||
|
||||
return BookData(
|
||||
ppn=ppn,
|
||||
title=title,
|
||||
signature=signature,
|
||||
edition=first_subfield_value(rec, "250", "a") or "",
|
||||
year=year,
|
||||
pages=first_subfield_value(rec, "300", "a") or "",
|
||||
publisher=first_subfield_value(rec, "264", "b") or "",
|
||||
isbn=isbn,
|
||||
language=lang,
|
||||
link="",
|
||||
author=author,
|
||||
media_type=mediatype,
|
||||
)
|
||||
|
||||
|
||||
class SWBData(Enum):
|
||||
URL = "https://sru.k10plus.de/opac-de-627!rec=1?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=marcxml"
|
||||
ARGSCHEMA = "pica."
|
||||
NAME = "SWB"
|
||||
|
||||
|
||||
class DNBData(Enum):
|
||||
URL = "https://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=MARC21-xml"
|
||||
ARGSCHEMA = ""
|
||||
NAME = "DNB"
|
||||
|
||||
|
||||
class SRUSite(Enum):
|
||||
SWB = SWBData
|
||||
DNB = DNBData
|
||||
|
||||
|
||||
RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative char set typically seen in RVK notations
|
||||
|
||||
|
||||
def find_newer_edition(
|
||||
swb_result: BookData, dnb_result: List[BookData]
|
||||
) -> Optional[List[BookData]]:
|
||||
"""
|
||||
New edition if:
|
||||
- year > swb.year OR
|
||||
- edition_number > swb.edition_number
|
||||
|
||||
Additional guards & preferences:
|
||||
- If both have signatures and they differ, skip (not the same work).
|
||||
- For duplicates (same ppn): keep the one that has a signature, and
|
||||
prefer a signature that matches swb_result.signature.
|
||||
- If multiple remain: keep the single 'latest' by (year desc,
|
||||
edition_number desc, best-signature-match desc, has-signature desc).
|
||||
"""
|
||||
|
||||
def norm_sig(s: Optional[str]) -> str:
|
||||
if not s:
|
||||
return ""
|
||||
# normalize: lowercase, collapse whitespace, keep alnum + a few separators
|
||||
s = s.lower()
|
||||
s = re.sub(r"\s+", " ", s).strip()
|
||||
# remove obvious noise; adjust if your signature format differs
|
||||
s = re.sub(r"[^a-z0-9\-_/\. ]+", "", s)
|
||||
return s
|
||||
|
||||
def has_sig(b: BookData) -> bool:
|
||||
return bool(getattr(b, "signature", None))
|
||||
|
||||
def sig_matches_swb(b: BookData) -> bool:
|
||||
if not has_sig(b) or not has_sig(swb_result):
|
||||
return False
|
||||
return norm_sig(b.signature) == norm_sig(swb_result.signature)
|
||||
|
||||
def strictly_newer(b: BookData) -> bool:
|
||||
by_year = (
|
||||
b.year is not None
|
||||
and swb_result.year is not None
|
||||
and b.year > swb_result.year
|
||||
)
|
||||
by_edition = (
|
||||
b.edition_number is not None
|
||||
and swb_result.edition_number is not None
|
||||
and b.edition_number > swb_result.edition_number
|
||||
)
|
||||
return by_year or by_edition
|
||||
|
||||
swb_sig_norm = norm_sig(getattr(swb_result, "signature", None))
|
||||
|
||||
# 1) Filter to same-work AND newer
|
||||
candidates: List[BookData] = []
|
||||
for b in dnb_result:
|
||||
# Skip if both signatures exist and don't match (different work)
|
||||
b_sig = getattr(b, "signature", None)
|
||||
if b_sig and swb_result.signature:
|
||||
if norm_sig(b_sig) != swb_sig_norm:
|
||||
continue # not the same work
|
||||
|
||||
# Keep only if newer by rules
|
||||
if strictly_newer(b):
|
||||
candidates.append(b)
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
# 2) Dedupe by PPN, preferring signature (and matching signature if possible)
|
||||
by_ppn: dict[Optional[str], BookData] = {}
|
||||
for b in candidates:
|
||||
key = getattr(b, "ppn", None)
|
||||
prev = by_ppn.get(key)
|
||||
if prev is None:
|
||||
by_ppn[key] = b
|
||||
continue
|
||||
|
||||
# Compute preference score for both
|
||||
def ppn_pref_score(x: BookData) -> tuple[int, int]:
|
||||
# (signature matches swb, has signature)
|
||||
return (1 if sig_matches_swb(x) else 0, 1 if has_sig(x) else 0)
|
||||
|
||||
if ppn_pref_score(b) > ppn_pref_score(prev):
|
||||
by_ppn[key] = b
|
||||
|
||||
deduped = list(by_ppn.values())
|
||||
if not deduped:
|
||||
return None
|
||||
|
||||
# 3) If multiple remain, keep only the latest one.
|
||||
# Order: year desc, edition_number desc, signature-match desc, has-signature desc
|
||||
def sort_key(b: BookData):
|
||||
year = b.year if b.year is not None else -1
|
||||
ed = b.edition_number if b.edition_number is not None else -1
|
||||
sig_match = 1 if sig_matches_swb(b) else 0
|
||||
sig_present = 1 if has_sig(b) else 0
|
||||
return (year, ed, sig_match, sig_present)
|
||||
|
||||
best = max(deduped, key=sort_key)
|
||||
return [best] if best else None
|
||||
|
||||
|
||||
class Api:
|
||||
def __init__(self, site: str, url: str, prefix: str):
|
||||
self.site = site
|
||||
self.url = url
|
||||
self.prefix = prefix
|
||||
# Reuse TCP connections across requests for better performance
|
||||
self._session = requests.Session()
|
||||
# Slightly larger connection pool for concurrent calls
|
||||
adapter = HTTPAdapter(pool_connections=10, pool_maxsize=20)
|
||||
self._session.mount("http://", adapter)
|
||||
self._session.mount("https://", adapter)
|
||||
|
||||
def close(self):
|
||||
try:
|
||||
self._session.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __del__(self):
|
||||
# Best-effort cleanup
|
||||
self.close()
|
||||
|
||||
def get(self, query_args: Iterable[str]) -> List[Record]:
|
||||
# if any query_arg ends with =, remove it
|
||||
if self.site == "DNB":
|
||||
args = [arg for arg in query_args if not arg.startswith("pica.")]
|
||||
if args == []:
|
||||
raise ValueError("DNB queries must include at least one search term")
|
||||
query_args = args
|
||||
# query_args = [f"{self.prefix}{arg}" for arg in query_args]
|
||||
query = "+and+".join(query_args)
|
||||
query = query.replace(" ", "%20").replace("&", "%26")
|
||||
# query_args = [arg for arg in query_args if not arg.endswith("=")]
|
||||
# query = "+and+".join(query_args)
|
||||
# query = query.replace(" ", "%20").replace("&", "%26")
|
||||
# insert the query into the url url is
|
||||
url = self.url.format(query)
|
||||
|
||||
log.debug(url)
|
||||
headers = {
|
||||
"User-Agent": f"{self.site} SRU Client, <alexander.kirchner@ph-freiburg.de>",
|
||||
"Accept": "application/xml",
|
||||
"Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
|
||||
}
|
||||
# Use persistent session and set timeouts to avoid hanging
|
||||
resp = self._session.get(url, headers=headers, timeout=(3.05, 60))
|
||||
if resp.status_code != 200:
|
||||
raise Exception(f"Error fetching data from SWB: {resp.status_code}")
|
||||
# Parse using raw bytes (original behavior) to preserve encoding edge cases
|
||||
sr = parse_search_retrieve_response(resp.content)
|
||||
return sr.records
|
||||
|
||||
def getBooks(self, query_args: Iterable[str]) -> List[BookData]:
|
||||
records: List[Record] = self.get(query_args)
|
||||
# Avoid printing on hot paths; rely on logger if needed
|
||||
log.debug(f"{self.site} found {len(records)} records for args={query_args}")
|
||||
books: List[BookData] = []
|
||||
# extract title from query_args if present
|
||||
title = None
|
||||
for arg in query_args:
|
||||
if arg.startswith("pica.tit="):
|
||||
title = arg.split("=")[1]
|
||||
break
|
||||
for rec in records:
|
||||
book = book_from_marc(rec.recordData)
|
||||
books.append(book)
|
||||
if title:
|
||||
books = [
|
||||
b
|
||||
for b in books
|
||||
if b.title and b.title.lower().startswith(title.lower())
|
||||
]
|
||||
return books
|
||||
|
||||
def getLinkForBook(self, book: BookData) -> str:
|
||||
# Not implemented: depends on catalog front-end; return empty string for now
|
||||
return ""
|
||||
|
||||
|
||||
class SWB(Api):
|
||||
def __init__(self):
|
||||
self.site = SWBData.NAME.value
|
||||
self.url = SWBData.URL.value
|
||||
self.prefix = SWBData.ARGSCHEMA.value
|
||||
super().__init__(self.site, self.url, self.prefix)
|
||||
@@ -1,6 +1,35 @@
|
||||
from .dataclass import ApparatData, BookData, Prof, Apparat, ELSA
|
||||
__all__ = [
|
||||
"custom_sort",
|
||||
"sort_semesters_list",
|
||||
"APP_NRS",
|
||||
"PROF_TITLES",
|
||||
"SEMAP_MEDIA_ACCOUNTS",
|
||||
"csv_to_list",
|
||||
"ELSA",
|
||||
"Apparat",
|
||||
"ApparatData",
|
||||
"BookData",
|
||||
"Prof",
|
||||
"Semester",
|
||||
"SemapDocument",
|
||||
"elsa_word_to_csv",
|
||||
"pdf_to_semap",
|
||||
"word_docx_to_csv",
|
||||
"word_to_semap",
|
||||
"ZoteroController",
|
||||
"eml_to_semap",
|
||||
]
|
||||
from .c_sort import custom_sort, sort_semesters_list
|
||||
from .constants import APP_NRS, PROF_TITLES, SEMAP_MEDIA_ACCOUNTS
|
||||
from .csvparser import csv_to_list
|
||||
from .wordparser import elsa_word_to_csv, word_docx_to_csv, word_to_semap, SemapDocument
|
||||
from .dataclass import ELSA, Apparat, ApparatData, BookData, Prof
|
||||
from .semester import Semester
|
||||
from .wordparser import (
|
||||
SemapDocument,
|
||||
elsa_word_to_csv,
|
||||
pdf_to_semap,
|
||||
word_docx_to_csv,
|
||||
word_to_semap,
|
||||
)
|
||||
from .xmlparser import eml_to_semap
|
||||
from .zotero import ZoteroController
|
||||
|
||||
@@ -1,36 +1,4 @@
|
||||
def parse_semester(semester: str):
|
||||
"""
|
||||
Parses the semester string into a sortable format.
|
||||
Returns a tuple of (year, type), where type is 0 for SoSe and 1 for WiSe.
|
||||
"""
|
||||
if semester.startswith("SoSe"):
|
||||
return int(semester.split()[1]), 0
|
||||
elif semester.startswith("WiSe"):
|
||||
year_part = semester.split()[1]
|
||||
start_year, _ = map(int, year_part.split("/"))
|
||||
return start_year, 1
|
||||
else:
|
||||
raise ValueError(f"Invalid semester format: {semester}")
|
||||
|
||||
|
||||
def custom_sort(entries):
|
||||
"""
|
||||
Sorts the list of tuples based on the custom schema.
|
||||
|
||||
:param entries: List of tuples in the format (str, int, int).
|
||||
:return: Sorted list of tuples.
|
||||
"""
|
||||
return sorted(
|
||||
entries,
|
||||
key=lambda entry: (
|
||||
parse_semester(entry[0]), # Sort by semester parsed as (year, type)
|
||||
entry[1], # Then by the second element of the tuple
|
||||
entry[2], # Finally by the third element of the tuple
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def parse_semester(semester: str):
|
||||
def parse_semester(semester: str) -> tuple[int, int]:
|
||||
"""
|
||||
Parses the semester string into a sortable format.
|
||||
Returns a tuple of (year, type), where type is 0 for SoSe and 1 for WiSe.
|
||||
@@ -48,6 +16,23 @@ def parse_semester(semester: str):
|
||||
raise ValueError(f"Invalid semester format: {semester}")
|
||||
|
||||
|
||||
def custom_sort(entries) -> list:
|
||||
"""
|
||||
Sorts the list of tuples based on the custom schema.
|
||||
|
||||
:param entries: List of tuples in the format (str, int, int).
|
||||
:return: Sorted list of tuples.
|
||||
"""
|
||||
return sorted(
|
||||
entries,
|
||||
key=lambda entry: (
|
||||
parse_semester(entry[0]), # Sort by semester parsed as (year, type)
|
||||
entry[1], # Then by the second element of the tuple
|
||||
entry[2], # Finally by the third element of the tuple
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def sort_semesters_list(semesters: list) -> list:
|
||||
"""
|
||||
Sorts a list of semester strings based on year and type.
|
||||
@@ -83,4 +68,4 @@ if __name__ == "__main__":
|
||||
"SoSe 25",
|
||||
]
|
||||
|
||||
print(sort_semesters_list(unsorted))
|
||||
# print(sort_semesters_list(unsorted))
|
||||
|
||||
@@ -30,184 +30,184 @@ PROF_TITLES = [
|
||||
]
|
||||
|
||||
SEMAP_MEDIA_ACCOUNTS = {
|
||||
"1": "1008000055",
|
||||
"2": "1008000188",
|
||||
"3": "1008000211",
|
||||
"4": "1008000344",
|
||||
"5": "1008000477",
|
||||
"6": "1008000500",
|
||||
"7": "1008000633",
|
||||
"8": "1008000766",
|
||||
"9": "1008000899",
|
||||
"10": "1008000922",
|
||||
"11": "1008001044",
|
||||
"12": "1008001177",
|
||||
"13": "1008001200",
|
||||
"14": "1008001333",
|
||||
"15": "1008001466",
|
||||
"16": "1008001599",
|
||||
"17": "1008001622",
|
||||
"18": "1008001755",
|
||||
"19": "1008001888",
|
||||
"20": "1008001911",
|
||||
"21": "1008002033",
|
||||
"22": "1008002166",
|
||||
"23": "1008002299",
|
||||
"24": "1008002322",
|
||||
"25": "1008002455",
|
||||
"26": "1008002588",
|
||||
"27": "1008002611",
|
||||
"28": "1008002744",
|
||||
"29": "1008002877",
|
||||
"30": "1008002900",
|
||||
"31": "1008003022",
|
||||
"32": "1008003155",
|
||||
"33": "1008003288",
|
||||
"34": "1008003311",
|
||||
"35": "1008003444",
|
||||
"36": "1008003577",
|
||||
"37": "1008003600",
|
||||
"38": "1008003733",
|
||||
"39": "1008003866",
|
||||
"40": "1008003999",
|
||||
"41": "1008004011",
|
||||
"42": "1008004144",
|
||||
"43": "1008004277",
|
||||
"44": "1008004300",
|
||||
"45": "1008004433",
|
||||
"46": "1008004566",
|
||||
"47": "1008004699",
|
||||
"48": "1008004722",
|
||||
"49": "1008004855",
|
||||
"50": "1008004988",
|
||||
"51": "1008005000",
|
||||
"52": "1008005133",
|
||||
"53": "1008005266",
|
||||
"54": "1008005399",
|
||||
"55": "1008005422",
|
||||
"56": "1008005555",
|
||||
"57": "1008005688",
|
||||
"58": "1008005711",
|
||||
"59": "1008005844",
|
||||
"60": "1008005977",
|
||||
"61": "1008006099",
|
||||
"62": "1008006122",
|
||||
"63": "1008006255",
|
||||
"64": "1008006388",
|
||||
"65": "1008006411",
|
||||
"66": "1008006544",
|
||||
"67": "1008006677",
|
||||
"68": "1008006700",
|
||||
"69": "1008006833",
|
||||
"70": "1008006966",
|
||||
"71": "1008007088",
|
||||
"72": "1008007111",
|
||||
"73": "1008007244",
|
||||
"74": "1008007377",
|
||||
"75": "1008007400",
|
||||
"76": "1008007533",
|
||||
"77": "1008007666",
|
||||
"78": "1008007799",
|
||||
"79": "1008007822",
|
||||
"80": "1008007955",
|
||||
"81": "1008008077",
|
||||
"82": "1008008100",
|
||||
"83": "1008008233",
|
||||
"84": "1008008366",
|
||||
"85": "1008008499",
|
||||
"86": "1008008522",
|
||||
"87": "1008008655",
|
||||
"88": "1008008788",
|
||||
"89": "1008008811",
|
||||
"90": "1008008944",
|
||||
"91": "1008009066",
|
||||
"92": "1008009199",
|
||||
"93": "1008009222",
|
||||
"94": "1008009355",
|
||||
"95": "1008009488",
|
||||
"96": "1008009511",
|
||||
"97": "1008009644",
|
||||
"98": "1008009777",
|
||||
"99": "1008009800",
|
||||
"100": "1008009933",
|
||||
"101": "1008010022",
|
||||
"102": "1008010155",
|
||||
"103": "1008010288",
|
||||
"104": "1008010311",
|
||||
"105": "1008010444",
|
||||
"106": "1008010577",
|
||||
"107": "1008010600",
|
||||
"108": "1008010733",
|
||||
"109": "1008010866",
|
||||
"110": "1008010999",
|
||||
"111": "1008011011",
|
||||
"112": "1008011144",
|
||||
"113": "1008011277",
|
||||
"114": "1008011300",
|
||||
"115": "1008011433",
|
||||
"116": "1008011566",
|
||||
"117": "1008011699",
|
||||
"118": "1008011722",
|
||||
"119": "1008011855",
|
||||
"120": "1008011988",
|
||||
"121": "1008012000",
|
||||
"122": "1008012133",
|
||||
"123": "1008012266",
|
||||
"124": "1008012399",
|
||||
"125": "1008012422",
|
||||
"126": "1008012555",
|
||||
"127": "1008012688",
|
||||
"128": "1008012711",
|
||||
"129": "1008012844",
|
||||
"130": "1008012977",
|
||||
"131": "1008013099",
|
||||
"132": "1008013122",
|
||||
"133": "1008013255",
|
||||
"134": "1008013388",
|
||||
"135": "1008013411",
|
||||
"136": "1008013544",
|
||||
"137": "1008013677",
|
||||
"138": "1008013700",
|
||||
"139": "1008013833",
|
||||
"140": "1008013966",
|
||||
"141": "1008014088",
|
||||
"142": "1008014111",
|
||||
"143": "1008014244",
|
||||
"144": "1008014377",
|
||||
"145": "1008014400",
|
||||
"146": "1008014533",
|
||||
"147": "1008014666",
|
||||
"148": "1008014799",
|
||||
"149": "1008014822",
|
||||
"150": "1008014955",
|
||||
"151": "1008015077",
|
||||
"152": "1008015100",
|
||||
"153": "1008015233",
|
||||
"154": "1008015366",
|
||||
"155": "1008015499",
|
||||
"156": "1008015522",
|
||||
"157": "1008015655",
|
||||
"158": "1008015788",
|
||||
"159": "1008015811",
|
||||
"160": "1008015944",
|
||||
"161": "1008016066",
|
||||
"162": "1008016199",
|
||||
"163": "1008016222",
|
||||
"164": "1008016355",
|
||||
"165": "1008016488",
|
||||
"166": "1008016511",
|
||||
"167": "1008016644",
|
||||
"168": "1008016777",
|
||||
"169": "1008016800",
|
||||
"170": "1008016933",
|
||||
"171": "1008017055",
|
||||
"172": "1008017188",
|
||||
"173": "1008017211",
|
||||
"174": "1008017344",
|
||||
"175": "1008017477",
|
||||
"176": "1008017500",
|
||||
"177": "1008017633",
|
||||
"178": "1008017766",
|
||||
"179": "1008017899",
|
||||
"180": "1008017922",
|
||||
1: "1008000055",
|
||||
2: "1008000188",
|
||||
3: "1008000211",
|
||||
4: "1008000344",
|
||||
5: "1008000477",
|
||||
6: "1008000500",
|
||||
7: "1008000633",
|
||||
8: "1008000766",
|
||||
9: "1008000899",
|
||||
10: "1008000922",
|
||||
11: "1008001044",
|
||||
12: "1008001177",
|
||||
13: "1008001200",
|
||||
14: "1008001333",
|
||||
15: "1008001466",
|
||||
16: "1008001599",
|
||||
17: "1008001622",
|
||||
18: "1008001755",
|
||||
19: "1008001888",
|
||||
20: "1008001911",
|
||||
21: "1008002033",
|
||||
22: "1008002166",
|
||||
23: "1008002299",
|
||||
24: "1008002322",
|
||||
25: "1008002455",
|
||||
26: "1008002588",
|
||||
27: "1008002611",
|
||||
28: "1008002744",
|
||||
29: "1008002877",
|
||||
30: "1008002900",
|
||||
31: "1008003022",
|
||||
32: "1008003155",
|
||||
33: "1008003288",
|
||||
34: "1008003311",
|
||||
35: "1008003444",
|
||||
36: "1008003577",
|
||||
37: "1008003600",
|
||||
38: "1008003733",
|
||||
39: "1008003866",
|
||||
40: "1008003999",
|
||||
41: "1008004011",
|
||||
42: "1008004144",
|
||||
43: "1008004277",
|
||||
44: "1008004300",
|
||||
45: "1008004433",
|
||||
46: "1008004566",
|
||||
47: "1008004699",
|
||||
48: "1008004722",
|
||||
49: "1008004855",
|
||||
50: "1008004988",
|
||||
51: "1008005000",
|
||||
52: "1008005133",
|
||||
53: "1008005266",
|
||||
54: "1008005399",
|
||||
55: "1008005422",
|
||||
56: "1008005555",
|
||||
57: "1008005688",
|
||||
58: "1008005711",
|
||||
59: "1008005844",
|
||||
60: "1008005977",
|
||||
61: "1008006099",
|
||||
62: "1008006122",
|
||||
63: "1008006255",
|
||||
64: "1008006388",
|
||||
65: "1008006411",
|
||||
66: "1008006544",
|
||||
67: "1008006677",
|
||||
68: "1008006700",
|
||||
69: "1008006833",
|
||||
70: "1008006966",
|
||||
71: "1008007088",
|
||||
72: "1008007111",
|
||||
73: "1008007244",
|
||||
74: "1008007377",
|
||||
75: "1008007400",
|
||||
76: "1008007533",
|
||||
77: "1008007666",
|
||||
78: "1008007799",
|
||||
79: "1008007822",
|
||||
80: "1008007955",
|
||||
81: "1008008077",
|
||||
82: "1008008100",
|
||||
83: "1008008233",
|
||||
84: "1008008366",
|
||||
85: "1008008499",
|
||||
86: "1008008522",
|
||||
87: "1008008655",
|
||||
88: "1008008788",
|
||||
89: "1008008811",
|
||||
90: "1008008944",
|
||||
91: "1008009066",
|
||||
92: "1008009199",
|
||||
93: "1008009222",
|
||||
94: "1008009355",
|
||||
95: "1008009488",
|
||||
96: "1008009511",
|
||||
97: "1008009644",
|
||||
98: "1008009777",
|
||||
99: "1008009800",
|
||||
100: "1008009933",
|
||||
101: "1008010022",
|
||||
102: "1008010155",
|
||||
103: "1008010288",
|
||||
104: "1008010311",
|
||||
105: "1008010444",
|
||||
106: "1008010577",
|
||||
107: "1008010600",
|
||||
108: "1008010733",
|
||||
109: "1008010866",
|
||||
110: "1008010999",
|
||||
111: "1008011011",
|
||||
112: "1008011144",
|
||||
113: "1008011277",
|
||||
114: "1008011300",
|
||||
115: "1008011433",
|
||||
116: "1008011566",
|
||||
117: "1008011699",
|
||||
118: "1008011722",
|
||||
119: "1008011855",
|
||||
120: "1008011988",
|
||||
121: "1008012000",
|
||||
122: "1008012133",
|
||||
123: "1008012266",
|
||||
124: "1008012399",
|
||||
125: "1008012422",
|
||||
126: "1008012555",
|
||||
127: "1008012688",
|
||||
128: "1008012711",
|
||||
129: "1008012844",
|
||||
130: "1008012977",
|
||||
131: "1008013099",
|
||||
132: "1008013122",
|
||||
133: "1008013255",
|
||||
134: "1008013388",
|
||||
135: "1008013411",
|
||||
136: "1008013544",
|
||||
137: "1008013677",
|
||||
138: "1008013700",
|
||||
139: "1008013833",
|
||||
140: "1008013966",
|
||||
141: "1008014088",
|
||||
142: "1008014111",
|
||||
143: "1008014244",
|
||||
144: "1008014377",
|
||||
145: "1008014400",
|
||||
146: "1008014533",
|
||||
147: "1008014666",
|
||||
148: "1008014799",
|
||||
149: "1008014822",
|
||||
150: "1008014955",
|
||||
151: "1008015077",
|
||||
152: "1008015100",
|
||||
153: "1008015233",
|
||||
154: "1008015366",
|
||||
155: "1008015499",
|
||||
156: "1008015522",
|
||||
157: "1008015655",
|
||||
158: "1008015788",
|
||||
159: "1008015811",
|
||||
160: "1008015944",
|
||||
161: "1008016066",
|
||||
162: "1008016199",
|
||||
163: "1008016222",
|
||||
164: "1008016355",
|
||||
165: "1008016488",
|
||||
166: "1008016511",
|
||||
167: "1008016644",
|
||||
168: "1008016777",
|
||||
169: "1008016800",
|
||||
170: "1008016933",
|
||||
171: "1008017055",
|
||||
172: "1008017188",
|
||||
173: "1008017211",
|
||||
174: "1008017344",
|
||||
175: "1008017477",
|
||||
176: "1008017500",
|
||||
177: "1008017633",
|
||||
178: "1008017766",
|
||||
179: "1008017899",
|
||||
180: "1008017922",
|
||||
}
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import csv
|
||||
|
||||
import chardet
|
||||
from charset_normalizer import detect
|
||||
|
||||
|
||||
def csv_to_list(path: str) -> list[str]:
|
||||
"""
|
||||
Extracts the data from a csv file and returns it as a pandas dataframe
|
||||
"""
|
||||
encoding = chardet.detect(open(path, "rb").read())["encoding"]
|
||||
encoding = detect(open(path, "rb").read())["encoding"]
|
||||
with open(path, newline="", encoding=encoding) as csvfile:
|
||||
# if decoder fails to map, assign ""
|
||||
reader = csv.reader(csvfile, delimiter=";", quotechar="|")
|
||||
@@ -20,4 +20,4 @@ def csv_to_list(path: str) -> list[str]:
|
||||
if __name__ == "__main__":
|
||||
text = csv_to_list("C:/Users/aky547/Desktop/semap/71.csv")
|
||||
# remove linebreaks
|
||||
# print(text)
|
||||
# #print(text)
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from enum import Enum
|
||||
import json
|
||||
from typing import Union, Any, Optional
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import regex
|
||||
|
||||
from src.logic.openai import name_tester, run_shortener, semester_converter
|
||||
from src.logic.semester import Semester
|
||||
|
||||
|
||||
@dataclass
|
||||
class Prof:
|
||||
@@ -32,7 +37,7 @@ class Prof:
|
||||
self._title = value
|
||||
|
||||
# add function that sets the data from a tuple
|
||||
def from_tuple(self, data: tuple[Union[str, int], ...]):
|
||||
def from_tuple(self, data: tuple[Union[str, int], ...]) -> "Prof":
|
||||
setattr(self, "id", data[0])
|
||||
setattr(self, "_title", data[1])
|
||||
setattr(self, "firstname", data[2])
|
||||
@@ -67,21 +72,64 @@ class BookData:
|
||||
language: Union[str, list[str], None] = field(default_factory=list)
|
||||
publisher: str | None = None
|
||||
place: str | None = None
|
||||
year: str | None = None
|
||||
year: int | None = None
|
||||
pages: str | None = None
|
||||
library_location: int | None = None
|
||||
library_location: str | None = None
|
||||
in_apparat: bool | None = False
|
||||
adis_idn: str | None = None
|
||||
old_book: Any | None = None
|
||||
media_type: str | None = None #
|
||||
in_library: bool | None = None # whether the book is in the library or not
|
||||
medianr: int | None = None # Media number in the library system
|
||||
|
||||
def __post_init__(self):
|
||||
self.library_location = (
|
||||
str(self.library_location) if self.library_location else None
|
||||
)
|
||||
if isinstance(self.language, list) and self.language:
|
||||
self.language = [lang.strip() for lang in self.language if lang.strip()]
|
||||
self.language = ",".join(self.language)
|
||||
self.year = regex.sub(r"[^\d]", "", str(self.year)) if self.year else None
|
||||
self.in_library = True if self.signature else False
|
||||
|
||||
def from_dict(self, data: dict) -> "BookData":
|
||||
for key, value in data.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
def merge(self, other: "BookData") -> "BookData":
|
||||
for key, value in other.__dict__.items():
|
||||
# merge lists, if the attribute is a list, extend it
|
||||
if isinstance(value, list):
|
||||
current_value = getattr(self, key)
|
||||
if current_value is None:
|
||||
current_value = []
|
||||
elif not isinstance(current_value, list):
|
||||
current_value = [current_value]
|
||||
# extend the list with the new values, but only if they are not already in the list
|
||||
for v in value:
|
||||
if v not in current_value:
|
||||
current_value.append(v)
|
||||
setattr(self, key, current_value)
|
||||
if value is not None and (
|
||||
getattr(self, key) is None or getattr(self, key) == ""
|
||||
):
|
||||
setattr(self, key, value)
|
||||
# in language, drop all entries that are longer than 3 characters
|
||||
if isinstance(self.language, list):
|
||||
self.language = [lang for lang in self.language if len(lang) <= 4]
|
||||
return self
|
||||
|
||||
@property
|
||||
def to_dict(self) -> str:
|
||||
"""Convert the dataclass to a dictionary."""
|
||||
return json.dumps(self.__dict__, ensure_ascii=False)
|
||||
data_dict = {
|
||||
key: value for key, value in self.__dict__.items() if value is not None
|
||||
}
|
||||
# remove old_book from data_dict
|
||||
if "old_book" in data_dict:
|
||||
del data_dict["old_book"]
|
||||
return json.dumps(data_dict, ensure_ascii=False)
|
||||
|
||||
def from_dataclass(self, dataclass: Optional[Any]) -> None:
|
||||
if dataclass is None:
|
||||
@@ -89,10 +137,44 @@ class BookData:
|
||||
for key, value in dataclass.__dict__.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
def get_book_type(self) -> str:
|
||||
if "Online" in self.pages:
|
||||
return "eBook"
|
||||
else:
|
||||
return "Druckausgabe"
|
||||
|
||||
def from_string(self, data: str) -> "BookData":
|
||||
ndata = json.loads(data)
|
||||
|
||||
return BookData(**ndata)
|
||||
|
||||
def from_LehmannsSearchResult(self, result: Any) -> "BookData":
|
||||
self.title = result.title
|
||||
self.author = "; ".join(result.authors) if result.authors else None
|
||||
self.edition = str(result.edition) if result.edition else None
|
||||
self.link = result.url
|
||||
self.isbn = (
|
||||
result.isbn13
|
||||
if isinstance(result.isbn13, list)
|
||||
else [result.isbn13]
|
||||
if result.isbn13
|
||||
else []
|
||||
)
|
||||
self.pages = str(result.pages) if result.pages else None
|
||||
self.publisher = result.publisher
|
||||
self.year = str(result.year) if result.year else None
|
||||
# self.pages = str(result.pages) if result.pages else None
|
||||
return self
|
||||
|
||||
@property
|
||||
def edition_number(self) -> Optional[int]:
|
||||
if self.edition is None:
|
||||
return 0
|
||||
match = regex.search(r"(\d+)", self.edition)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
return 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class MailData:
|
||||
@@ -141,6 +223,7 @@ class Subjects(Enum):
|
||||
for i in cls:
|
||||
if i.name == name:
|
||||
return i.id - 1
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -204,3 +287,124 @@ class ELSA:
|
||||
class ApparatData:
|
||||
prof: Prof = field(default_factory=Prof)
|
||||
apparat: Apparat = field(default_factory=Apparat)
|
||||
|
||||
|
||||
@dataclass
|
||||
class XMLMailSubmission:
|
||||
name: Optional[str] = None
|
||||
lastname: Optional[str] = None
|
||||
title: Optional[str] = None
|
||||
telno: Optional[int] = None
|
||||
email: Optional[str] = None
|
||||
app_name: Optional[str] = None
|
||||
subject: Optional[str] = None
|
||||
semester: Optional[Semester] = None
|
||||
books: Optional[list[BookData]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Book:
|
||||
author: str = None
|
||||
year: str = None
|
||||
edition: str = None
|
||||
title: str = None
|
||||
location: str = None
|
||||
publisher: str = None
|
||||
signature: str = None
|
||||
internal_notes: str = None
|
||||
|
||||
@property
|
||||
def has_signature(self) -> bool:
|
||||
return self.signature is not None and self.signature != ""
|
||||
|
||||
@property
|
||||
def is_empty(self) -> bool:
|
||||
return all(
|
||||
[
|
||||
self.author == "",
|
||||
self.year == "",
|
||||
self.edition == "",
|
||||
self.title == "",
|
||||
self.location == "",
|
||||
self.publisher == "",
|
||||
self.signature == "",
|
||||
self.internal_notes == "",
|
||||
]
|
||||
)
|
||||
|
||||
def from_dict(self, data: dict[str, Any]):
|
||||
for key, value in data.items():
|
||||
value = value.strip()
|
||||
if value == "\u2002\u2002\u2002\u2002\u2002":
|
||||
value = ""
|
||||
|
||||
if key == "Autorenname(n):Nachname, Vorname":
|
||||
self.author = value
|
||||
elif key == "Jahr/Auflage":
|
||||
self.year = value.split("/")[0] if "/" in value else value
|
||||
self.edition = value.split("/")[1] if "/" in value else ""
|
||||
elif key == "Titel":
|
||||
self.title = value
|
||||
elif key == "Ort und Verlag":
|
||||
self.location = value.split(",")[0] if "," in value else value
|
||||
self.publisher = value.split(",")[1] if "," in value else ""
|
||||
elif key == "Standnummer":
|
||||
self.signature = value.strip()
|
||||
elif key == "Interne Vermerke":
|
||||
self.internal_notes = value
|
||||
|
||||
|
||||
@dataclass
|
||||
class SemapDocument:
|
||||
subject: str = None
|
||||
phoneNumber: int = None
|
||||
mail: str = None
|
||||
title: str = None
|
||||
title_suggestions: list[str] = None
|
||||
semester: Union[str, Semester] = None
|
||||
books: list[Book] = None
|
||||
eternal: bool = False
|
||||
personName: str = None
|
||||
personTitle: str = None
|
||||
title_length = 0
|
||||
title_max_length = 0
|
||||
|
||||
def __post_init__(self):
|
||||
self.title_suggestions = []
|
||||
|
||||
@property
|
||||
def nameSetter(self):
|
||||
data = name_tester(self.personTitle)
|
||||
name = f"{data['last_name']}, {data['first_name']}"
|
||||
if data["title"] is not None:
|
||||
title = data["title"]
|
||||
self.personTitle = title
|
||||
self.personName = name
|
||||
self.title_length = len(self.title) + 3 + len(self.personName.split(",")[0])
|
||||
if self.title_length > 40:
|
||||
name_len = len(self.personName.split(",")[0])
|
||||
self.title_max_length = 38 - name_len
|
||||
suggestions = run_shortener(self.title, self.title_max_length)
|
||||
for suggestion in suggestions:
|
||||
self.title_suggestions.append(suggestion["shortened_string"])
|
||||
else:
|
||||
self.title_suggestions = []
|
||||
pass
|
||||
|
||||
@property
|
||||
def renameSemester(self) -> None:
|
||||
if self.semester:
|
||||
if ", Dauer" in self.semester:
|
||||
self.semester = self.semester.split(",")[0]
|
||||
self.eternal = True
|
||||
self.semester = Semester().from_string(self.semester)
|
||||
else:
|
||||
self.semester = Semester().from_string(
|
||||
semester_converter(self.semester)
|
||||
)
|
||||
|
||||
@property
|
||||
def signatures(self) -> list[str]:
|
||||
if self.books is not None:
|
||||
return [book.signature for book in self.books if book.has_signature]
|
||||
return []
|
||||
|
||||
312
src/logic/lehmannsapi.py
Normal file
312
src/logic/lehmannsapi.py
Normal file
@@ -0,0 +1,312 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import Iterable, List, Optional
|
||||
from urllib.parse import quote_plus, urljoin
|
||||
|
||||
import httpx
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from src.logic.dataclass import BookData
|
||||
|
||||
BASE = "https://www.lehmanns.de"
|
||||
SEARCH_URL = "https://www.lehmanns.de/search/quick?mediatype_id=&q="
|
||||
|
||||
|
||||
@dataclass
|
||||
class LehmannsSearchResult:
|
||||
title: str
|
||||
url: str
|
||||
|
||||
# Core fields from the listing card
|
||||
year: Optional[int] = None
|
||||
edition: Optional[int] = None
|
||||
publisher: Optional[str] = None
|
||||
isbn13: Optional[str] = None
|
||||
|
||||
# Extras from the listing card
|
||||
description: Optional[str] = None
|
||||
authors: list[str] = field(default_factory=list)
|
||||
media_type: Optional[str] = None
|
||||
book_format: Optional[str] = None
|
||||
price_eur: Optional[float] = None
|
||||
currency: str = "EUR"
|
||||
image: Optional[str] = None
|
||||
|
||||
# From detail page:
|
||||
pages: Optional[str] = None # "<N> Seiten"
|
||||
buyable: bool = True # set in enrich_pages (detail page)
|
||||
unavailable_hint: Optional[str] = (
|
||||
None # e.g. "Titel ist leider vergriffen; keine Neuauflage"
|
||||
)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
class LehmannsClient:
|
||||
"""Scrapes quick-search results, then enriches (and filters) via product pages."""
|
||||
|
||||
def __init__(self, timeout: float = 20.0):
|
||||
self.client = httpx.Client(
|
||||
headers={
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/124.0 Safari/537.36"
|
||||
),
|
||||
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
},
|
||||
timeout=timeout,
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
||||
def close(self):
|
||||
self.client.close()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc):
|
||||
self.close()
|
||||
|
||||
# ------------------- Search (listing) -------------------
|
||||
|
||||
def build_search_url(self, title: str) -> str:
|
||||
# spaces -> '+'
|
||||
return SEARCH_URL + quote_plus(title)
|
||||
|
||||
def search_by_title(
|
||||
self,
|
||||
title: str,
|
||||
limit: Optional[int] = None,
|
||||
strict: bool = False,
|
||||
only_latest: bool = True,
|
||||
) -> List[BookData]:
|
||||
"""
|
||||
Parse the listing page only (no availability check here).
|
||||
Use enrich_pages(...) afterwards to fetch detail pages, add 'pages',
|
||||
and drop unbuyable items.
|
||||
"""
|
||||
url = self.build_search_url(title=title)
|
||||
html = self._get(url)
|
||||
if not html:
|
||||
return []
|
||||
results = self._parse_results(html)
|
||||
self.enrich_pages(results)
|
||||
|
||||
results = [BookData().from_LehmannsSearchResult(r) for r in results]
|
||||
if strict:
|
||||
# filter results to only those with exact title match (case-insensitive)
|
||||
title_lower = title.lower()
|
||||
results = [r for r in results if r.title and r.title.lower() == title_lower]
|
||||
# results = [r for r in results if r.buyable]
|
||||
return results
|
||||
if limit is not None:
|
||||
results = results[: max(0, limit)]
|
||||
if only_latest and len(results) > 1:
|
||||
# keep only the latest edition (highest edition number)
|
||||
results.sort(key=lambda r: (r.edition_number or 0), reverse=True)
|
||||
results = [results[0]]
|
||||
return results
|
||||
|
||||
# ------------------- Detail enrichment & filtering -------------------
|
||||
|
||||
def enrich_pages(
|
||||
self, results: Iterable[LehmannsSearchResult], drop_unbuyable: bool = True
|
||||
) -> List[LehmannsSearchResult]:
|
||||
"""
|
||||
Fetch each result.url, extract:
|
||||
- pages: from <span class="book-meta meta-seiten" itemprop="numberOfPages">...</span>
|
||||
- availability: from <li class="availability-3">...</li>
|
||||
* if it contains "Titel ist leider vergriffen", mark buyable=False
|
||||
* if it also contains "keine Neuauflage", set unavailable_hint accordingly
|
||||
If drop_unbuyable=True, exclude non-buyable results from the returned list.
|
||||
"""
|
||||
enriched: List[LehmannsSearchResult] = []
|
||||
for r in results:
|
||||
try:
|
||||
html = self._get(r.url)
|
||||
if not html:
|
||||
# Can't verify; keep as-is when not dropping, else skip
|
||||
if not drop_unbuyable:
|
||||
enriched.append(r)
|
||||
continue
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser") # type: ignore
|
||||
|
||||
# Pages
|
||||
pages_node = soup.select_one( # type: ignore
|
||||
"span.book-meta.meta-seiten[itemprop='numberOfPages'], "
|
||||
"span.book-meta.meta-seiten[itemprop='numberofpages'], "
|
||||
".meta-seiten [itemprop='numberOfPages'], "
|
||||
".meta-seiten[itemprop='numberOfPages'], "
|
||||
".book-meta.meta-seiten"
|
||||
)
|
||||
if pages_node:
|
||||
text = pages_node.get_text(" ", strip=True)
|
||||
m = re.search(r"\d+", text)
|
||||
if m:
|
||||
r.pages = f"{m.group(0)} Seiten"
|
||||
|
||||
# Availability via li.availability-3
|
||||
avail_li = soup.select_one("li.availability-3") # type: ignore
|
||||
if avail_li:
|
||||
avail_text = " ".join(
|
||||
avail_li.get_text(" ", strip=True).split()
|
||||
).lower()
|
||||
if "titel ist leider vergriffen" in avail_text:
|
||||
r.buyable = False
|
||||
if "keine neuauflage" in avail_text:
|
||||
r.unavailable_hint = (
|
||||
"Titel ist leider vergriffen; keine Neuauflage"
|
||||
)
|
||||
else:
|
||||
r.unavailable_hint = "Titel ist leider vergriffen"
|
||||
|
||||
# Append or drop
|
||||
if (not drop_unbuyable) or r.buyable:
|
||||
enriched.append(r)
|
||||
|
||||
except Exception:
|
||||
# On any per-item error, keep the record if not dropping; else skip
|
||||
if not drop_unbuyable:
|
||||
enriched.append(r)
|
||||
continue
|
||||
|
||||
return enriched
|
||||
|
||||
# ------------------- Internals -------------------
|
||||
|
||||
def _get(self, url: str) -> Optional[str]:
|
||||
try:
|
||||
r = self.client.get(url)
|
||||
r.encoding = "utf-8"
|
||||
if r.status_code == 200 and "text/html" in (
|
||||
r.headers.get("content-type") or ""
|
||||
):
|
||||
return r.text
|
||||
except httpx.HTTPError:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _parse_results(self, html: str) -> List[LehmannsSearchResult]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
results: list[LehmannsSearchResult] = []
|
||||
|
||||
for block in soup.select("div.info-block"):
|
||||
a = block.select_one(".title a[href]")
|
||||
if not a:
|
||||
continue
|
||||
url = urljoin(BASE, a["href"].strip())
|
||||
base_title = (block.select_one(".title [itemprop='name']") or a).get_text( # type: ignore
|
||||
strip=True
|
||||
)
|
||||
|
||||
# Alternative headline => extend title
|
||||
alt_tag = block.select_one(".description[itemprop='alternativeHeadline']") # type: ignore
|
||||
alternative_headline = alt_tag.get_text(strip=True) if alt_tag else None
|
||||
title = (
|
||||
f"{base_title} : {alternative_headline}"
|
||||
if alternative_headline
|
||||
else base_title
|
||||
)
|
||||
description = alternative_headline
|
||||
|
||||
# Authors from .author
|
||||
authors: list[str] = []
|
||||
author_div = block.select_one("div.author") # type: ignore
|
||||
if author_div:
|
||||
t = author_div.get_text(" ", strip=True)
|
||||
t = re.sub(r"^\s*von\s+", "", t, flags=re.I)
|
||||
for part in re.split(r"\s*;\s*|\s*&\s*|\s+und\s+", t):
|
||||
name = " ".join(part.split())
|
||||
if name:
|
||||
authors.append(name)
|
||||
|
||||
# Media + format
|
||||
media_type = None
|
||||
book_format = None
|
||||
type_text = block.select_one(".type") # type: ignore
|
||||
if type_text:
|
||||
t = type_text.get_text(" ", strip=True)
|
||||
m = re.search(r"\b(Buch|eBook|Hörbuch)\b", t)
|
||||
if m:
|
||||
media_type = m.group(1)
|
||||
fm = re.search(r"\(([^)]+)\)", t)
|
||||
if fm:
|
||||
book_format = fm.group(1).strip().upper()
|
||||
|
||||
# Year
|
||||
year = None
|
||||
y = block.select_one("[itemprop='copyrightYear']") # type: ignore
|
||||
if y:
|
||||
try:
|
||||
year = int(y.get_text(strip=True))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Edition
|
||||
edition = None
|
||||
ed = block.select_one("[itemprop='bookEdition']") # type: ignore
|
||||
if ed:
|
||||
m = re.search(r"\d+", ed.get_text(strip=True))
|
||||
if m:
|
||||
edition = int(m.group())
|
||||
|
||||
# Publisher
|
||||
publisher = None
|
||||
pub = block.select_one( # type: ignore
|
||||
".publisherprop [itemprop='name']"
|
||||
) or block.select_one(".publisher [itemprop='name']") # type: ignore
|
||||
if pub:
|
||||
publisher = pub.get_text(strip=True)
|
||||
|
||||
# ISBN-13
|
||||
isbn13 = None
|
||||
isbn_tag = block.select_one(".isbn [itemprop='isbn'], [itemprop='isbn']") # type: ignore
|
||||
if isbn_tag:
|
||||
digits = re.sub(r"[^0-9Xx]", "", isbn_tag.get_text(strip=True))
|
||||
m = re.search(r"(97[89]\d{10})", digits)
|
||||
if m:
|
||||
isbn13 = m.group(1)
|
||||
|
||||
# Price (best effort)
|
||||
price_eur = None
|
||||
txt = block.get_text(" ", strip=True)
|
||||
mprice = re.search(r"(\d{1,3}(?:\.\d{3})*,\d{2})\s*€", txt)
|
||||
if not mprice and block.parent:
|
||||
sib = block.parent.get_text(" ", strip=True)
|
||||
mprice = re.search(r"(\d{1,3}(?:\.\d{3})*,\d{2})\s*€", sib)
|
||||
if mprice:
|
||||
num = mprice.group(1).replace(".", "").replace(",", ".")
|
||||
try:
|
||||
price_eur = float(num)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Image (best-effort)
|
||||
image = None
|
||||
left_img = block.find_previous("img") # type: ignore
|
||||
if left_img and left_img.get("src"):
|
||||
image = urljoin(BASE, left_img["src"])
|
||||
|
||||
results.append(
|
||||
LehmannsSearchResult(
|
||||
title=title,
|
||||
url=url,
|
||||
description=description,
|
||||
authors=authors,
|
||||
media_type=media_type,
|
||||
book_format=book_format,
|
||||
year=year,
|
||||
edition=edition,
|
||||
publisher=publisher,
|
||||
isbn13=isbn13,
|
||||
price_eur=price_eur,
|
||||
image=image,
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
@@ -1,10 +1,12 @@
|
||||
from openai import OpenAI
|
||||
from src import settings
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
from src import settings
|
||||
|
||||
|
||||
|
||||
def init_client():
|
||||
def init_client() -> OpenAI:
|
||||
"""Initialize the OpenAI client with the API key and model from settings."""
|
||||
global client, model, api_key
|
||||
if not settings.openAI.api_key:
|
||||
@@ -16,9 +18,11 @@ def init_client():
|
||||
api_key = settings.openAI.api_key
|
||||
client = OpenAI(api_key=api_key)
|
||||
return client
|
||||
def run_shortener(title:str, length:int):
|
||||
|
||||
|
||||
def run_shortener(title: str, length: int) -> list[dict[str, Any]]:
|
||||
client = init_client()
|
||||
response = client.responses.create(
|
||||
response = client.responses.create( # type: ignore
|
||||
model=model,
|
||||
instructions="""you are a sentence shortener. The next message will contain the string to shorten and the length limit.
|
||||
You need to shorten the string to be under the length limit, while keeping as much detail as possible. The result may NOT be longer than the length limit.
|
||||
@@ -27,27 +31,28 @@ based on that, please reply only the shortened string. Give me 5 choices. if the
|
||||
)
|
||||
answers = response.output_text
|
||||
return eval(answers) # type: ignore
|
||||
#answers are strings in json format, so we need to convert them to a list of dicts
|
||||
# answers are strings in json format, so we need to convert them to a list of dicts
|
||||
|
||||
|
||||
def name_tester(name: str):
|
||||
def name_tester(name: str) -> dict:
|
||||
client = init_client()
|
||||
response = client.responses.create(
|
||||
model = model,
|
||||
response = client.responses.create( # type: ignore
|
||||
model=model,
|
||||
instructions="""you are a name tester, You are given a name and will have to split the name into first name, last name, and if present the title. Return the name in a json format with the keys "title", "first_name", "last_name". If no title is present, set title to none. Do NOt return the answer in a codeblock, use a pure json string. Assume the names are in the usual german naming scheme""",
|
||||
input = f'{{"name":"{name}"}}'
|
||||
input=f'{{"name":"{name}"}}',
|
||||
)
|
||||
answers = response.output_text
|
||||
|
||||
return json.loads(answers)
|
||||
|
||||
def semester_converter(semester:str):
|
||||
|
||||
def semester_converter(semester: str) -> str:
|
||||
client = init_client()
|
||||
response = client.responses.create(
|
||||
model = model,
|
||||
response = client.responses.create( # type: ignore
|
||||
model=model,
|
||||
instructions="""you are a semester converter. You will be given a string. Convert this into a string like this: SoSe YY or WiSe YY/YY+1. Do not return the answer in a codeblock, use a pure string.""",
|
||||
input = semester
|
||||
input=semester,
|
||||
)
|
||||
answers = response.output_text
|
||||
|
||||
return answers
|
||||
return answers
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
# add depend path to system path
|
||||
|
||||
import pandas as pd
|
||||
from pdfquery import PDFQuery
|
||||
|
||||
|
||||
def pdf_to_csv(path: str) -> pd.DataFrame:
|
||||
def pdf_to_csv(path: str) -> str:
|
||||
"""
|
||||
Extracts the data from a pdf file and returns it as a pandas dataframe
|
||||
"""
|
||||
@@ -21,4 +20,4 @@ if __name__ == "__main__":
|
||||
text = pdf_to_csv("54_pdf.pdf")
|
||||
# remove linebreaks
|
||||
text = text.replace("\n", "")
|
||||
print(text)
|
||||
# print(text)
|
||||
|
||||
248
src/logic/semester.py
Normal file
248
src/logic/semester.py
Normal file
@@ -0,0 +1,248 @@
|
||||
"""Semester helper class
|
||||
|
||||
A small utility around the *German* academic calendar that distinguishes
|
||||
between *Wintersemester* (WiSe) and *Sommersemester* (SoSe).
|
||||
|
||||
Key points
|
||||
----------
|
||||
* A **`Semester`** is identified by a *term* ("SoSe" or "WiSe") and the last two
|
||||
digits of the calendar year in which the term *starts*.
|
||||
* Formatting **never** pads the year with a leading zero – so ``6`` stays ``6``.
|
||||
* ``offset(n)`` and the static ``generate_missing`` reliably walk the timeline
|
||||
one semester at a time with correct year transitions:
|
||||
|
||||
SoSe 6 → **WiSe 6/7** → SoSe 7 → WiSe 7/8 → …
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
class Semester:
|
||||
"""Represents a German university semester (WiSe or SoSe)."""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Class‑level defaults – will be *copied* to each instance and then
|
||||
# potentially overwritten in ``__init__``.
|
||||
# ------------------------------------------------------------------
|
||||
_year: int | None = int(str(datetime.datetime.now().year)[2:]) # 24 → 24
|
||||
_semester: str | None = None # "WiSe" or "SoSe" – set later
|
||||
_month: int | None = datetime.datetime.now().month
|
||||
value: str | None = None # Human‑readable label, e.g. "WiSe 23/24"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Construction helpers
|
||||
# ------------------------------------------------------------------
|
||||
def __init__(
|
||||
self,
|
||||
year: int | None = None,
|
||||
semester: str | None = None,
|
||||
month: int | None = None,
|
||||
) -> None:
|
||||
if year is not None:
|
||||
self._year = int(year)
|
||||
if semester is not None:
|
||||
if semester not in ("WiSe", "SoSe"):
|
||||
raise ValueError("semester must be 'WiSe' or 'SoSe'")
|
||||
self._semester = semester
|
||||
if month is not None:
|
||||
self._month = month
|
||||
|
||||
self.__post_init__()
|
||||
|
||||
def __post_init__(self) -> None: # noqa: D401 – keep original name
|
||||
if self._year is None:
|
||||
self._year = int(str(datetime.datetime.now().year)[2:])
|
||||
if self._month is None:
|
||||
self._month = datetime.datetime.now().month
|
||||
if self._semester is None:
|
||||
self._generate_semester_from_month()
|
||||
self._compute_value()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Dunder helpers
|
||||
# ------------------------------------------------------------------
|
||||
def __str__(self) -> str: # noqa: D401 – keep original name
|
||||
return self.value or "<invalid Semester>"
|
||||
|
||||
def __repr__(self) -> str: # Helpful for debugging lists
|
||||
return f"Semester({self._year!r}, {self._semester!r})"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ------------------------------------------------------------------
|
||||
def _generate_semester_from_month(self) -> None:
|
||||
"""Infer *WiSe* / *SoSe* from the month attribute."""
|
||||
self._semester = "WiSe" if (self._month <= 3 or self._month > 9) else "SoSe"
|
||||
|
||||
def _compute_value(self) -> None:
|
||||
"""Human‑readable semester label – e.g. ``WiSe 23/24`` or ``SoSe 24``."""
|
||||
year = self._year
|
||||
if self._semester == "WiSe":
|
||||
next_year = (year + 1) % 100 # wrap 99 → 0
|
||||
self.value = f"WiSe {year}/{next_year}"
|
||||
else: # SoSe
|
||||
self.value = f"SoSe {year}"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
def offset(self, value: int) -> "Semester":
|
||||
"""Return a new :class:`Semester` *value* steps away.
|
||||
|
||||
The algorithm maps every semester to a monotonically increasing
|
||||
*linear index* so that simple addition suffices:
|
||||
|
||||
``index = year * 2 + (0 if SoSe else 1)``.
|
||||
"""
|
||||
if not isinstance(value, int):
|
||||
raise TypeError("value must be an int (number of semesters to jump)")
|
||||
if value == 0:
|
||||
return Semester(self._year, self._semester)
|
||||
|
||||
current_idx = self._year * 2 + (0 if self._semester == "SoSe" else 1)
|
||||
target_idx = current_idx + value
|
||||
if target_idx < 0:
|
||||
raise ValueError("offset would result in a negative year – not supported")
|
||||
|
||||
new_year, semester_bit = divmod(target_idx, 2)
|
||||
new_semester = "SoSe" if semester_bit == 0 else "WiSe"
|
||||
return Semester(new_year, new_semester)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Comparison helpers
|
||||
# ------------------------------------------------------------------
|
||||
def isPastSemester(self, current: "Semester") -> bool:
|
||||
log.debug(f"Comparing {self} < {current}")
|
||||
if self.year < current.year:
|
||||
return True
|
||||
if self.year == current.year:
|
||||
return (
|
||||
self.semester == "WiSe" and current.semester == "SoSe"
|
||||
) # WiSe before next SoSe
|
||||
return False
|
||||
|
||||
def isFutureSemester(self, current: "Semester") -> bool:
|
||||
if self.year > current.year:
|
||||
return True
|
||||
if self.year == current.year:
|
||||
return (
|
||||
self.semester == "SoSe" and current.semester == "WiSe"
|
||||
) # SoSe after WiSe of same year
|
||||
return False
|
||||
|
||||
def isMatch(self, other: "Semester") -> bool:
|
||||
return self.year == other.year and self.semester == other.semester
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Convenience properties
|
||||
# ------------------------------------------------------------------
|
||||
@property
|
||||
def next(self) -> "Semester":
|
||||
return self.offset(1)
|
||||
|
||||
@property
|
||||
def previous(self) -> "Semester":
|
||||
return self.offset(-1)
|
||||
|
||||
@property
|
||||
def year(self) -> int:
|
||||
return self._year
|
||||
|
||||
@property
|
||||
def semester(self) -> str:
|
||||
return self._semester
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Static helpers
|
||||
# ------------------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_missing(start: "Semester", end: "Semester") -> list[str]:
|
||||
"""Return all consecutive semesters from *start* to *end* (inclusive)."""
|
||||
if not isinstance(start, Semester) or not isinstance(end, Semester):
|
||||
raise TypeError("start and end must be Semester instances")
|
||||
if start.isFutureSemester(end) and not start.isMatch(end):
|
||||
raise ValueError("'start' must not be after 'end'")
|
||||
|
||||
chain: list[Semester] = [start.value]
|
||||
current = start
|
||||
while not current.isMatch(end):
|
||||
current = current.next
|
||||
chain.append(current.value)
|
||||
if len(chain) > 1000: # sanity guard
|
||||
raise RuntimeError("generate_missing exceeded sane iteration limit")
|
||||
return chain
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Parsing helper
|
||||
# ------------------------------------------------------------------
|
||||
@classmethod
|
||||
def from_string(cls, s: str) -> "Semester":
|
||||
"""Parse a human‑readable semester label and return a :class:`Semester`.
|
||||
|
||||
Accepted formats (case‑insensitive)::
|
||||
|
||||
"SoSe <YY>" → SoSe of year YY
|
||||
"WiSe <YY>/<YY+1>" → Winter term starting in YY
|
||||
"WiSe <YY>" → Shorthand for the above (next year implied)
|
||||
|
||||
``YY`` may contain a leading zero ("06" → 6).
|
||||
"""
|
||||
if not isinstance(s, str):
|
||||
raise TypeError("s must be a string")
|
||||
|
||||
pattern = r"\s*(WiSe|SoSe)\s+(\d{1,2})(?:\s*/\s*(\d{1,2}))?\s*"
|
||||
m = re.fullmatch(pattern, s, flags=re.IGNORECASE)
|
||||
if not m:
|
||||
raise ValueError(
|
||||
"invalid semester string format – expected 'SoSe YY' or 'WiSe YY/YY' (spacing flexible)"
|
||||
)
|
||||
|
||||
term_raw, y1_str, y2_str = m.groups()
|
||||
term = term_raw.capitalize() # normalize case → "WiSe" or "SoSe"
|
||||
year = int(y1_str.lstrip("0") or "0") # "06" → 6, "0" stays 0
|
||||
|
||||
if term == "SoSe":
|
||||
if y2_str is not None:
|
||||
raise ValueError(
|
||||
"SoSe string should not contain '/' followed by a second year"
|
||||
)
|
||||
return cls(year, "SoSe")
|
||||
|
||||
# term == "WiSe"
|
||||
if y2_str is not None:
|
||||
next_year = int(y2_str.lstrip("0") or "0")
|
||||
expected_next = (year + 1) % 100
|
||||
if next_year != expected_next:
|
||||
raise ValueError("WiSe second year must equal first year + 1 (mod 100)")
|
||||
# Accept both explicit "WiSe 6/7" and shorthand "WiSe 6"
|
||||
return cls(year, "WiSe")
|
||||
|
||||
|
||||
# ------------------------- quick self‑test -------------------------
|
||||
if __name__ == "__main__":
|
||||
# Chain generation demo ------------------------------------------------
|
||||
s_start = Semester(6, "SoSe") # SoSe 6
|
||||
s_end = Semester(25, "WiSe") # WiSe 25/26
|
||||
chain = Semester.generate_missing(s_start, s_end)
|
||||
# print("generate_missing:", [str(s) for s in chain])
|
||||
|
||||
# Parsing demo ---------------------------------------------------------
|
||||
examples = [
|
||||
"SoSe 6",
|
||||
"WiSe 6/7",
|
||||
"WiSe 6",
|
||||
"SoSe 23",
|
||||
"WiSe 23/24",
|
||||
"WiSe 24",
|
||||
"WiSe 99/00",
|
||||
"SoSe 00",
|
||||
"WiSe 100/101", # test large year
|
||||
]
|
||||
for ex in examples:
|
||||
parsed = Semester.from_string(ex)
|
||||
print(f"'{ex}' → {parsed} ({parsed.year=}, {parsed.semester=})")
|
||||
@@ -13,7 +13,7 @@ class Settings:
|
||||
default_apps: bool = True
|
||||
custom_applications: list[dict] = field(default_factory=list)
|
||||
|
||||
def save_settings(self):
|
||||
def save_settings(self) -> None:
|
||||
"""Save the settings to the config file."""
|
||||
with open("config.yaml", "w") as f:
|
||||
yaml.dump(self.__dict__, f)
|
||||
|
||||
@@ -1,26 +1,20 @@
|
||||
from enum import Enum
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
# import sleep_and_retry decorator to retry requests
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
from typing import Union, Any, Optional
|
||||
from src.logic.dataclass import BookData
|
||||
|
||||
from src.logic.dataclass import BookData
|
||||
from src.shared.logging import log
|
||||
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
|
||||
from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
|
||||
import loguru
|
||||
import sys
|
||||
from src import LOG_DIR
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
|
||||
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
|
||||
|
||||
|
||||
|
||||
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
|
||||
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||
BASE = "https://rds.ibs-bw.de"
|
||||
@@ -40,6 +34,14 @@ RATE_LIMIT = 20
|
||||
RATE_PERIOD = 30
|
||||
|
||||
|
||||
class TransformerType(Enum):
|
||||
ARRAY = "ARRAY"
|
||||
COinS = "COinS"
|
||||
BibTeX = "BibTeX"
|
||||
RIS = "RIS"
|
||||
RDS = "RDS"
|
||||
|
||||
|
||||
class WebRequest:
|
||||
def __init__(self) -> None:
|
||||
"""Request data from the web, and format it depending on the mode."""
|
||||
@@ -58,14 +60,14 @@ class WebRequest:
|
||||
log.info("Using any book")
|
||||
return self
|
||||
|
||||
def set_apparat(self, apparat: int):
|
||||
def set_apparat(self, apparat: int) -> "WebRequest":
|
||||
self.apparat = apparat
|
||||
if int(self.apparat) < 10:
|
||||
self.apparat = f"0{self.apparat}"
|
||||
log.info(f"Set apparat to {self.apparat}")
|
||||
return self
|
||||
|
||||
def get_ppn(self, signature: str):
|
||||
def get_ppn(self, signature: str) -> "WebRequest":
|
||||
self.signature = signature
|
||||
if "+" in signature:
|
||||
signature = signature.replace("+", "%2B")
|
||||
@@ -80,6 +82,12 @@ class WebRequest:
|
||||
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search_ppn(self, ppn: str) -> str:
|
||||
response = requests.get(API_URL.format(ppn), timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def get_book_links(self, searchterm: str) -> list[str]:
|
||||
response: str = self.search_book(searchterm) # type:ignore
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
@@ -91,7 +99,7 @@ class WebRequest:
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search(self, link: str):
|
||||
def search(self, link: str) -> Optional[str]:
|
||||
try:
|
||||
response = requests.get(link, timeout=self.timeout)
|
||||
return response.text
|
||||
@@ -99,7 +107,7 @@ class WebRequest:
|
||||
log.error(f"Request failed: {e}")
|
||||
return None
|
||||
|
||||
def get_data(self) -> Union[list[str], None]:
|
||||
def get_data(self) -> Optional[list[str]]:
|
||||
links = self.get_book_links(self.ppn)
|
||||
log.debug(f"Links: {links}")
|
||||
return_data: list[str] = []
|
||||
@@ -111,21 +119,8 @@ class WebRequest:
|
||||
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||
if locations:
|
||||
for location in locations:
|
||||
item_location = location.find(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
).text.strip()
|
||||
log.debug(f"Item location: {item_location}")
|
||||
if self.use_any:
|
||||
pre_tag = soup.find_all("pre")
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
raise ValueError("No <pre> tag found")
|
||||
elif f"Semesterapparat-{self.apparat}" in item_location:
|
||||
if "1. OG Semesterapparat" in location.text:
|
||||
log.success("Found Semesterapparat, adding entry")
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
@@ -137,14 +132,40 @@ class WebRequest:
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
else:
|
||||
log.error(
|
||||
f"Signature {self.signature} not found in {item_location}"
|
||||
)
|
||||
# return_data = []
|
||||
item_location = location.find(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
).text.strip()
|
||||
log.debug(f"Item location: {item_location}")
|
||||
if self.use_any:
|
||||
pre_tag = soup.find_all("pre")
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
raise ValueError("No <pre> tag found")
|
||||
elif f"Semesterapparat-{self.apparat}" in item_location:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
else:
|
||||
log.error(
|
||||
f"Signature {self.signature} not found in {item_location}"
|
||||
)
|
||||
# return_data = []
|
||||
|
||||
return return_data
|
||||
|
||||
def get_data_elsa(self):
|
||||
def get_data_elsa(self) -> Optional[list[str]]:
|
||||
links = self.get_book_links(self.ppn)
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
@@ -173,10 +194,16 @@ class BibTextTransformer:
|
||||
ValueError: Raised if mode is not in valid_modes
|
||||
"""
|
||||
|
||||
valid_modes = ["ARRAY", "COinS", "BibTeX", "RIS", "RDS"]
|
||||
valid_modes = [
|
||||
TransformerType.ARRAY,
|
||||
TransformerType.COinS,
|
||||
TransformerType.BibTeX,
|
||||
TransformerType.RIS,
|
||||
TransformerType.RDS,
|
||||
]
|
||||
|
||||
def __init__(self, mode: str = "ARRAY") -> None:
|
||||
self.mode = mode
|
||||
def __init__(self, mode: TransformerType = TransformerType.ARRAY) -> None:
|
||||
self.mode = mode.value
|
||||
self.field = None
|
||||
self.signature = None
|
||||
if mode not in self.valid_modes:
|
||||
@@ -185,12 +212,12 @@ class BibTextTransformer:
|
||||
self.data = None
|
||||
# self.bookdata = BookData(**self.data)
|
||||
|
||||
def use_signature(self, signature: str):
|
||||
def use_signature(self, signature: str) -> "BibTextTransformer":
|
||||
"""use the exact signature to search for the book"""
|
||||
self.signature = signature
|
||||
return self
|
||||
|
||||
def get_data(self, data: Union[list[str]] = None) -> "BibTextTransformer":
|
||||
def get_data(self, data: Optional[list[str]] = None) -> "BibTextTransformer":
|
||||
RIS_IDENT = "TY -"
|
||||
ARRAY_IDENT = "[kid]"
|
||||
COinS_IDENT = "ctx_ver"
|
||||
|
||||
@@ -1,131 +1,13 @@
|
||||
import sys
|
||||
import zipfile
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Union
|
||||
from typing import Any, Optional
|
||||
|
||||
import loguru
|
||||
import fitz # PyMuPDF
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
from docx import Document
|
||||
|
||||
from src import LOG_DIR
|
||||
from src.backend import Semester
|
||||
from src.logic.openai import name_tester, run_shortener, semester_converter
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
|
||||
|
||||
|
||||
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Book:
|
||||
author: str = None
|
||||
year: str = None
|
||||
edition: str = None
|
||||
title: str = None
|
||||
location: str = None
|
||||
publisher: str = None
|
||||
signature: str = None
|
||||
internal_notes: str = None
|
||||
|
||||
@property
|
||||
def has_signature(self) -> bool:
|
||||
return self.signature is not None and self.signature != ""
|
||||
|
||||
@property
|
||||
def is_empty(self) -> bool:
|
||||
return all(
|
||||
[
|
||||
self.author == "",
|
||||
self.year == "",
|
||||
self.edition == "",
|
||||
self.title == "",
|
||||
self.location == "",
|
||||
self.publisher == "",
|
||||
self.signature == "",
|
||||
self.internal_notes == "",
|
||||
]
|
||||
)
|
||||
|
||||
def from_dict(self, data: dict[str, Any]):
|
||||
for key, value in data.items():
|
||||
value = value.strip()
|
||||
if value == "\u2002\u2002\u2002\u2002\u2002":
|
||||
value = ""
|
||||
|
||||
if key == "Autorenname(n):Nachname, Vorname":
|
||||
self.author = value
|
||||
elif key == "Jahr/Auflage":
|
||||
self.year = value.split("/")[0] if "/" in value else value
|
||||
self.edition = value.split("/")[1] if "/" in value else ""
|
||||
elif key == "Titel":
|
||||
self.title = value
|
||||
elif key == "Ort und Verlag":
|
||||
self.location = value.split(",")[0] if "," in value else value
|
||||
self.publisher = value.split(",")[1] if "," in value else ""
|
||||
elif key == "Standnummer":
|
||||
self.signature = value.strip()
|
||||
elif key == "Interne Vermerke":
|
||||
self.internal_notes = value
|
||||
|
||||
|
||||
@dataclass
|
||||
class SemapDocument:
|
||||
subject: str = None
|
||||
phoneNumber: int = None
|
||||
mail: str = None
|
||||
title: str = None
|
||||
title_suggestions: list[str] = None
|
||||
semester: Union[str, Semester] = None
|
||||
books: list[Book] = None
|
||||
eternal: bool = False
|
||||
personName: str = None
|
||||
personTitle: str = None
|
||||
title_length = 0
|
||||
title_max_length = 0
|
||||
|
||||
def __post_init__(self):
|
||||
self.title_suggestions = []
|
||||
|
||||
@property
|
||||
def nameSetter(self):
|
||||
data = name_tester(self.personTitle)
|
||||
name = f"{data['last_name']}, {data['first_name']}"
|
||||
if data["title"] is not None:
|
||||
title = data["title"]
|
||||
self.personTitle = title
|
||||
self.personName = name
|
||||
self.title_length = len(self.title) + 3 + len(self.personName.split(",")[0])
|
||||
if self.title_length > 40:
|
||||
log.warning("Title is too long")
|
||||
name_len = len(self.personName.split(",")[0])
|
||||
self.title_max_length = 38 - name_len
|
||||
suggestions = run_shortener(self.title, self.title_max_length)
|
||||
for suggestion in suggestions:
|
||||
self.title_suggestions.append(suggestion["shortened_string"])
|
||||
else:
|
||||
self.title_suggestions = []
|
||||
pass
|
||||
@property
|
||||
def renameSemester(self) -> None:
|
||||
if ", Dauer" in self.semester:
|
||||
self.semester = self.semester.split(",")[0]
|
||||
self.eternal = True
|
||||
self.semester = Semester().from_string(self.semester)
|
||||
else:
|
||||
log.warning("Semester {} is not valid", self.semester)
|
||||
self.semester = Semester().from_string(semester_converter(self.semester))
|
||||
|
||||
@property
|
||||
def signatures(self) -> list[str]:
|
||||
if self.books is not None:
|
||||
return [book.signature for book in self.books if book.has_signature]
|
||||
return []
|
||||
from src.logic.dataclass import Book, SemapDocument
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
|
||||
@@ -141,8 +23,8 @@ def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
|
||||
|
||||
text = text.replace("\n", "")
|
||||
row_data.append(text)
|
||||
if text == "Ihr Fach:":
|
||||
row_data.append(get_fach(path))
|
||||
# if text == "Ihr Fach:":
|
||||
# row_data.append(get_fach(path))
|
||||
data.append(row_data)
|
||||
df = pd.DataFrame(data)
|
||||
df.columns = df.iloc[0]
|
||||
@@ -153,7 +35,7 @@ def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
|
||||
return m_data
|
||||
|
||||
|
||||
def get_fach(path: str) -> str:
|
||||
def get_fach(path: str) -> Optional[str]:
|
||||
document = zipfile.ZipFile(path)
|
||||
xml_data = document.read("word/document.xml")
|
||||
document.close()
|
||||
@@ -161,17 +43,18 @@ def get_fach(path: str) -> str:
|
||||
soup = BeautifulSoup(xml_data, "xml")
|
||||
# text we need is in <w:p w14:paraId="12456A32" ... > -> w:r -> w:t
|
||||
paragraphs = soup.find_all("w:p")
|
||||
names = []
|
||||
for para in paragraphs:
|
||||
para_id = para.get("w14:paraId")
|
||||
if para_id == "12456A32":
|
||||
# get the data in the w:t
|
||||
for run in para.find_all("w:r"):
|
||||
data = run.find("w:t")
|
||||
return data.contents[0]
|
||||
if data and data.contents:
|
||||
return data.contents[0]
|
||||
return None
|
||||
|
||||
|
||||
def makeDict():
|
||||
def makeDict() -> dict[str, Optional[str]]:
|
||||
return {
|
||||
"work_author": None,
|
||||
"section_author": None,
|
||||
@@ -189,8 +72,8 @@ def makeDict():
|
||||
}
|
||||
|
||||
|
||||
def tuple_to_dict(tlist: tuple, type: str) -> dict:
|
||||
ret = []
|
||||
def tuple_to_dict(tlist: tuple, type: str) -> list[dict[str, Optional[str]]]:
|
||||
ret: list[dict[str, Optional[str]]] = []
|
||||
for line in tlist:
|
||||
data = makeDict()
|
||||
if type == "Monografien":
|
||||
@@ -230,7 +113,7 @@ def tuple_to_dict(tlist: tuple, type: str) -> dict:
|
||||
return ret
|
||||
|
||||
|
||||
def elsa_word_to_csv(path: str):
|
||||
def elsa_word_to_csv(path: str) -> tuple[list[dict[str, Optional[str]]], str]:
|
||||
doc = Document(path)
|
||||
# # print all lines in doc
|
||||
doctype = [para.text for para in doc.paragraphs if para.text != ""][-1]
|
||||
@@ -265,14 +148,14 @@ def elsa_word_to_csv(path: str):
|
||||
return tuple_to_dict(data, doctype), doctype
|
||||
|
||||
|
||||
def word_to_semap(word_path: str) -> SemapDocument:
|
||||
def word_to_semap(word_path: str, ai: bool = True) -> SemapDocument:
|
||||
log.info("Parsing Word Document {}", word_path)
|
||||
semap = SemapDocument()
|
||||
df = word_docx_to_csv(word_path)
|
||||
apparatdata = df[0]
|
||||
apparatdata = apparatdata.to_dict()
|
||||
keys = list(apparatdata.keys())
|
||||
print(apparatdata, keys)
|
||||
# print(apparatdata, keys)
|
||||
|
||||
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys) - 1, 2)}
|
||||
semap.phoneNumber = appdata["Telefon:"]
|
||||
@@ -286,8 +169,9 @@ def word_to_semap(word_path: str) -> SemapDocument:
|
||||
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
|
||||
semap.title = appdata["Veranstaltung:"]
|
||||
semap.semester = appdata["Semester:"]
|
||||
semap.renameSemester
|
||||
semap.nameSetter
|
||||
if ai:
|
||||
semap.renameSemester
|
||||
semap.nameSetter
|
||||
|
||||
books = df[2]
|
||||
booklist = []
|
||||
@@ -308,8 +192,182 @@ def word_to_semap(word_path: str) -> SemapDocument:
|
||||
return semap
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
else_df = elsa_word_to_csv(
|
||||
"C:/Users/aky547/Desktop/ELSA_Bestellung Scann Der Westen und der Rest.docx"
|
||||
def pdf_to_semap(pdf_path: str, ai: bool = True) -> SemapDocument:
|
||||
"""
|
||||
Parse a Semesterapparat PDF like the sample you provided and return a SemapDocument.
|
||||
- No external programs, only PyMuPDF.
|
||||
- Robust to multi-line field values (e.g., hyphenated emails) and multi-line table cells.
|
||||
- Works across multiple pages; headers only need to exist on the first page.
|
||||
"""
|
||||
doc = fitz.open(pdf_path)
|
||||
semap = SemapDocument()
|
||||
|
||||
# ---------- helpers ----------
|
||||
def _join_tokens(tokens: list[str]) -> str:
|
||||
"""Join tokens, preserving hyphen/URL joins across line wraps."""
|
||||
parts = []
|
||||
for tok in tokens:
|
||||
if parts and (
|
||||
parts[-1].endswith("-")
|
||||
or parts[-1].endswith("/")
|
||||
or parts[-1].endswith(":")
|
||||
):
|
||||
parts[-1] = parts[-1] + tok # no space after '-', '/' or ':'
|
||||
else:
|
||||
parts.append(tok)
|
||||
return " ".join(parts).strip()
|
||||
|
||||
def _extract_row_values_multiline(
|
||||
page, labels: list[str], y_window: float = 24
|
||||
) -> dict[str, str]:
|
||||
"""For a row of inline labels (e.g., Name/Fach/Telefon/Mail), grab text to the right of each label."""
|
||||
rects = []
|
||||
for lab in labels:
|
||||
hits = page.search_for(lab)
|
||||
if hits:
|
||||
rects.append((lab, hits[0]))
|
||||
if not rects:
|
||||
return {}
|
||||
|
||||
rects.sort(key=lambda t: t[1].x0)
|
||||
words = page.get_text("words")
|
||||
out = {}
|
||||
for i, (lab, r) in enumerate(rects):
|
||||
x0 = r.x1 + 1
|
||||
x1 = rects[i + 1][1].x0 - 1 if i + 1 < len(rects) else page.rect.width - 5
|
||||
y0 = r.y0 - 3
|
||||
y1 = r.y0 + y_window
|
||||
toks = [w for w in words if x0 <= w[0] <= x1 and y0 <= w[1] <= y1]
|
||||
toks.sort(key=lambda w: (w[1], w[0])) # line, then x
|
||||
out[lab] = _join_tokens([w[4] for w in toks])
|
||||
return out
|
||||
|
||||
def _compute_columns_from_headers(page0):
|
||||
"""Find column headers (once) and derive column centers + header baseline."""
|
||||
headers = [
|
||||
("Autorenname(n):", "Autorenname(n):Nachname, Vorname"),
|
||||
("Jahr/Auflage", "Jahr/Auflage"),
|
||||
("Titel", "Titel"),
|
||||
("Ort und Verlag", "Ort und Verlag"),
|
||||
("Standnummer", "Standnummer"),
|
||||
("Interne Vermerke", "Interne Vermerke"),
|
||||
]
|
||||
found = []
|
||||
for label, canon in headers:
|
||||
rects = [
|
||||
r for r in page0.search_for(label) if r.y0 > 200
|
||||
] # skip top-of-form duplicates
|
||||
if rects:
|
||||
found.append((canon, rects[0]))
|
||||
found.sort(key=lambda t: t[1].x0)
|
||||
cols = [(canon, r.x0, r.x1, (r.x0 + r.x1) / 2.0) for canon, r in found]
|
||||
header_y = min(r.y0 for _, r in found) if found else 0
|
||||
return cols, header_y
|
||||
|
||||
def _extract_table_rows_from_page(
|
||||
page, cols, header_y, y_top_margin=5, y_bottom_margin=40, y_tol=26.0
|
||||
):
|
||||
"""
|
||||
Group words into logical rows (tolerant to wrapped lines), then map each word
|
||||
to the nearest column by x-center and join tokens per column.
|
||||
"""
|
||||
words = [
|
||||
w
|
||||
for w in page.get_text("words")
|
||||
if w[1] > header_y + y_top_margin
|
||||
and w[3] < page.rect.height - y_bottom_margin
|
||||
]
|
||||
|
||||
# group into row bands by y (tolerance big enough to capture wrapped lines, but below next row gap)
|
||||
rows = []
|
||||
for w in sorted(words, key=lambda w: w[1]):
|
||||
y = w[1]
|
||||
for row in rows:
|
||||
if abs(row["y_mean"] - y) <= y_tol:
|
||||
row["ys"].append(y)
|
||||
row["y_mean"] = sum(row["ys"]) / len(row["ys"])
|
||||
row["words"].append(w)
|
||||
break
|
||||
else:
|
||||
rows.append({"y_mean": y, "ys": [y], "words": [w]})
|
||||
|
||||
# map to columns + join
|
||||
joined_rows = []
|
||||
for row in rows:
|
||||
rowdict = {canon: "" for canon, *_ in cols}
|
||||
words_by_col = {canon: [] for canon, *_ in cols}
|
||||
for w in sorted(row["words"], key=lambda w: (w[1], w[0])):
|
||||
xmid = (w[0] + w[2]) / 2.0
|
||||
canon = min(cols, key=lambda c: abs(xmid - c[3]))[0]
|
||||
words_by_col[canon].append(w[4])
|
||||
for canon, toks in words_by_col.items():
|
||||
rowdict[canon] = _join_tokens(toks)
|
||||
if any(v for v in rowdict.values()):
|
||||
joined_rows.append(rowdict)
|
||||
return joined_rows
|
||||
|
||||
# ---------- top-of-form fields ----------
|
||||
p0 = doc[0]
|
||||
row1 = _extract_row_values_multiline(
|
||||
p0,
|
||||
["Ihr Name und Titel:", "Ihr Fach:", "Telefon:", "Mailadresse:"],
|
||||
y_window=22,
|
||||
)
|
||||
print(else_df)
|
||||
row2 = _extract_row_values_multiline(
|
||||
p0, ["Veranstaltung:", "Semester:"], y_window=20
|
||||
)
|
||||
|
||||
name_title = row1.get("Ihr Name und Titel:", "") or ""
|
||||
semap.subject = row1.get("Ihr Fach:", None)
|
||||
semap.phoneNumber = row1.get("Telefon:", None) # keep as-is (string like "682-308")
|
||||
semap.mail = row1.get("Mailadresse:", None)
|
||||
semap.personName = ",".join(name_title.split(",")[:-1]) if name_title else None
|
||||
semap.personTitle = (
|
||||
",".join(name_title.split(",")[-1:]).strip() if name_title else None
|
||||
)
|
||||
|
||||
semap.title = row2.get("Veranstaltung:", None)
|
||||
semap.semester = row2.get("Semester:", None)
|
||||
|
||||
# ---------- table extraction (all pages) ----------
|
||||
cols, header_y = _compute_columns_from_headers(p0)
|
||||
all_rows: list[dict[str, Any]] = []
|
||||
for pn in range(len(doc)):
|
||||
all_rows.extend(_extract_table_rows_from_page(doc[pn], cols, header_y))
|
||||
|
||||
# drop the sub-header line "Nachname, Vorname" etc.
|
||||
filtered = []
|
||||
for r in all_rows:
|
||||
if r.get("Autorenname(n):Nachname, Vorname", "").strip() in (
|
||||
"",
|
||||
"Nachname, Vorname",
|
||||
):
|
||||
# skip if it's just the sub-header line
|
||||
if all(not r[c] for c in r if c != "Autorenname(n):Nachname, Vorname"):
|
||||
continue
|
||||
filtered.append(r)
|
||||
|
||||
# build Book objects (same filters as your word parser)
|
||||
booklist: list[Book] = []
|
||||
for row in filtered:
|
||||
b = Book()
|
||||
b.from_dict(row)
|
||||
if b.is_empty:
|
||||
continue
|
||||
if not b.has_signature:
|
||||
continue
|
||||
booklist.append(b)
|
||||
|
||||
semap.books = booklist
|
||||
|
||||
# keep parity with your post-processing
|
||||
if ai:
|
||||
_ = semap.renameSemester
|
||||
_ = semap.nameSetter
|
||||
|
||||
return semap
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
else_df = pdf_to_semap("C:/Users/aky547/Dokumente/testsemap.pdf")
|
||||
# print(else_df)
|
||||
|
||||
67
src/logic/xmlparser.py
Normal file
67
src/logic/xmlparser.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from src.logic.dataclass import Apparat, BookData, SemapDocument, XMLMailSubmission
|
||||
from src.logic.semester import Semester
|
||||
|
||||
|
||||
def parse_xml_submission(xml_string: str) -> XMLMailSubmission:
|
||||
"""
|
||||
Parse an XML string representing a mail submission and return an XMLMailSubmission object.
|
||||
"""
|
||||
submission = XMLMailSubmission()
|
||||
root = ET.fromstring(xml_string)
|
||||
static_data = root.find("static")
|
||||
static_info = {child.tag: child.text for child in static_data}
|
||||
books = root.find("books")
|
||||
books_info = []
|
||||
for book in books:
|
||||
book_details = {detail.tag: detail.text for detail in book}
|
||||
book = BookData(
|
||||
author=book_details.get("authorname"),
|
||||
year=book_details.get("year").split("/")[0]
|
||||
if "/" in book_details.get("year")
|
||||
else book_details.get("year"),
|
||||
edition=book_details.get("year").split("/")[1]
|
||||
if "/" in book_details.get("year")
|
||||
else None,
|
||||
title=book_details.get("title"),
|
||||
signature=book_details.get("signature"),
|
||||
)
|
||||
books_info.append(book)
|
||||
# Extract static data
|
||||
submission.name = static_info.get("name")
|
||||
submission.lastname = static_info.get("lastname")
|
||||
submission.title = static_info.get("title")
|
||||
submission.telno = int(static_info.get("telno"))
|
||||
submission.email = static_info.get("mail")
|
||||
submission.app_name = static_info.get("apparatsname")
|
||||
submission.subject = static_info.get("subject")
|
||||
sem_year = static_info.get("semester").split()[1]
|
||||
sem_term = static_info.get("semester").split()[0]
|
||||
submission.semester = Semester(semester=sem_term, year=int(sem_year))
|
||||
submission.books = books_info
|
||||
# Extract book information
|
||||
# book_info = []
|
||||
# for book in books:
|
||||
# book_details = {detail.tag: detail.text for detail in book}
|
||||
# book_info.append(book_details)
|
||||
return submission
|
||||
|
||||
|
||||
def eml_parser(path: str) -> XMLMailSubmission:
|
||||
with open(path, "r", encoding="utf-8") as file:
|
||||
xml_content = file.read().split("\n\n", 1)[1] # Skip headers
|
||||
print("EML content loaded, parsing XML...")
|
||||
print(xml_content)
|
||||
return parse_xml_submission(xml_content)
|
||||
|
||||
|
||||
def eml_to_semap(xml_mail: XMLMailSubmission) -> SemapDocument:
|
||||
submission = eml_parser(xml_mail)
|
||||
semap_doc = SemapDocument(
|
||||
# prof=Prof(name=submission.name, lastname=submission.lastname, email=submission.email),
|
||||
apparat=Apparat(name=submission.app_name, subject=submission.subject),
|
||||
semester=submission.semester,
|
||||
books=submission.books,
|
||||
)
|
||||
return semap_doc
|
||||
@@ -1,7 +1,11 @@
|
||||
from pyzotero import zotero
|
||||
from dataclasses import dataclass
|
||||
from src.logic.webrequest import WebRequest, BibTextTransformer
|
||||
from typing import Optional
|
||||
|
||||
from pyzotero import zotero
|
||||
|
||||
from src import settings
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -10,11 +14,11 @@ class Creator:
|
||||
lastName: str = None
|
||||
creatorType: str = "author"
|
||||
|
||||
def from_dict(self, data: dict):
|
||||
def from_dict(self, data: dict) -> None:
|
||||
for key, value in data.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
def from_string(self, data: str):
|
||||
def from_string(self, data: str) -> "Creator":
|
||||
if "," in data:
|
||||
self.firstName = data.split(",")[1]
|
||||
self.lastName = data.split(",")[0]
|
||||
@@ -54,7 +58,7 @@ class Book:
|
||||
rights: str = None
|
||||
extra: str = None
|
||||
|
||||
def to_dict(self):
|
||||
def to_dict(self) -> dict:
|
||||
ret = {}
|
||||
for key, value in self.__dict__.items():
|
||||
if value:
|
||||
@@ -93,14 +97,14 @@ class BookSection:
|
||||
collections = list
|
||||
relations = dict
|
||||
|
||||
def to_dict(self):
|
||||
def to_dict(self) -> dict:
|
||||
ret = {}
|
||||
for key, value in self.__dict__.items():
|
||||
if value:
|
||||
ret[key] = value
|
||||
return ret
|
||||
|
||||
def assign(self, book):
|
||||
def assign(self, book) -> None:
|
||||
for key, value in book.__dict__.items():
|
||||
if key in self.__dict__.keys():
|
||||
try:
|
||||
@@ -140,14 +144,14 @@ class JournalArticle:
|
||||
collections = list
|
||||
relations = dict
|
||||
|
||||
def to_dict(self):
|
||||
def to_dict(self) -> dict:
|
||||
ret = {}
|
||||
for key, value in self.__dict__.items():
|
||||
if value:
|
||||
ret[key] = value
|
||||
return ret
|
||||
|
||||
def assign(self, book: dict):
|
||||
def assign(self, book: dict) -> None:
|
||||
for key, value in book.__dict__.items():
|
||||
if key in self.__dict__.keys():
|
||||
try:
|
||||
@@ -162,15 +166,15 @@ class ZoteroController:
|
||||
def __init__(self):
|
||||
if self.zoterocfg.library_id is None:
|
||||
return
|
||||
self.zot = zotero.Zotero(
|
||||
self.zot = zotero.Zotero( # type: ignore
|
||||
self.zoterocfg.library_id,
|
||||
self.zoterocfg.library_type,
|
||||
self.zoterocfg.api_key,
|
||||
)
|
||||
|
||||
def get_books(self):
|
||||
def get_books(self) -> list:
|
||||
ret = []
|
||||
items = self.zot.top()
|
||||
items = self.zot.top() # type: ignore
|
||||
for item in items:
|
||||
if item["data"]["itemType"] == "book":
|
||||
ret.append(item)
|
||||
@@ -178,17 +182,17 @@ class ZoteroController:
|
||||
|
||||
# create item in zotero
|
||||
# item is a part of a book
|
||||
def __get_data(self, isbn):
|
||||
def __get_data(self, isbn) -> dict:
|
||||
web = WebRequest()
|
||||
web.get_ppn(isbn)
|
||||
data = web.get_data_elsa()
|
||||
bib = BibTextTransformer("ARRAY")
|
||||
bib = BibTextTransformer()
|
||||
bib.get_data(data)
|
||||
book = bib.return_data()
|
||||
return book
|
||||
|
||||
# # print(zot.item_template("bookSection"))
|
||||
def createBook(self, isbn):
|
||||
# # #print(zot.item_template("bookSection"))
|
||||
def createBook(self, isbn) -> Book:
|
||||
book = self.__get_data(isbn)
|
||||
|
||||
bookdata = Book()
|
||||
@@ -204,26 +208,28 @@ class ZoteroController:
|
||||
authors = [
|
||||
Creator().from_string(author).__dict__ for author in book.author.split(";")
|
||||
]
|
||||
authors = [author for author in authors if author["lastName"] is not None]
|
||||
bookdata.creators = authors
|
||||
return bookdata
|
||||
|
||||
def createItem(self, item):
|
||||
resp = self.zot.create_items([item])
|
||||
def createItem(self, item) -> Optional[str]:
|
||||
resp = self.zot.create_items([item]) # type: ignore
|
||||
if "successful" in resp.keys():
|
||||
# print(resp["successful"]["0"]["key"])
|
||||
log.debug(resp)
|
||||
return resp["successful"]["0"]["key"]
|
||||
else:
|
||||
return None
|
||||
|
||||
def deleteItem(self, key):
|
||||
def deleteItem(self, key) -> None:
|
||||
items = self.zot.items()
|
||||
for item in items:
|
||||
if item["key"] == key:
|
||||
self.zot.delete_item(item)
|
||||
# print(item)
|
||||
self.zot.delete_item(item) # type: ignore
|
||||
# #print(item)
|
||||
break
|
||||
|
||||
def createHGSection(self, book: Book, data: dict):
|
||||
def createHGSection(self, book: Book, data: dict) -> Optional[str]:
|
||||
log.debug(book)
|
||||
chapter = BookSection()
|
||||
chapter.assign(book)
|
||||
chapter.pages = data["pages"]
|
||||
@@ -241,11 +247,11 @@ class ZoteroController:
|
||||
]
|
||||
chapter.creators += authors
|
||||
|
||||
# print(chapter.to_dict())
|
||||
log.debug(chapter.to_dict())
|
||||
return self.createItem(chapter.to_dict())
|
||||
pass
|
||||
|
||||
def createBookSection(self, book: Book, data: dict):
|
||||
def createBookSection(self, book: Book, data: dict) -> Optional[str]:
|
||||
chapter = BookSection()
|
||||
chapter.assign(book)
|
||||
chapter.pages = data["pages"]
|
||||
@@ -256,8 +262,8 @@ class ZoteroController:
|
||||
return self.createItem(chapter.to_dict())
|
||||
# chapter.creators
|
||||
|
||||
def createJournalArticle(self, journal, article):
|
||||
# print(type(article))
|
||||
def createJournalArticle(self, journal, article) -> Optional[str]:
|
||||
# #print(type(article))
|
||||
journalarticle = JournalArticle()
|
||||
journalarticle.assign(journal)
|
||||
journalarticle.itemType = "journalArticle"
|
||||
@@ -273,12 +279,12 @@ class ZoteroController:
|
||||
journalarticle.issue = article["issue"]
|
||||
journalarticle.url = article["isbn"]
|
||||
|
||||
# print(journalarticle.to_dict())
|
||||
# #print(journalarticle.to_dict())
|
||||
|
||||
return self.createItem(journalarticle.to_dict())
|
||||
|
||||
def get_citation(self, item):
|
||||
title = self.zot.item(
|
||||
def get_citation(self, item) -> str:
|
||||
title = self.zot.item( # type: ignore
|
||||
item,
|
||||
content="bib",
|
||||
style="deutsche-gesellschaft-fur-psychologie",
|
||||
@@ -319,16 +325,16 @@ if __name__ == "__main__":
|
||||
# if isinstance(publishers, str):
|
||||
# publishers = [publishers]
|
||||
# for publisher in publishers:
|
||||
# # print(publisher)
|
||||
# # #print(publisher)
|
||||
# creator = Creator().from_string(publisher)
|
||||
# creator.creatorType = "editor"
|
||||
# authors.append(creator.__dict__)
|
||||
|
||||
# chapter.creators = authors
|
||||
# chapter.publisher = book.publisher
|
||||
# # print(chapter.to_dict())
|
||||
# # #print(chapter.to_dict())
|
||||
# createBookSection(chapter.to_dict())
|
||||
# get_citation("9ZXH8DDE")
|
||||
# # # print()
|
||||
# # print(get_books())
|
||||
# # print(zot.item_creator_types("bookSection"))
|
||||
# # # #print()
|
||||
# # #print(get_books())
|
||||
# # #print(zot.item_creator_types("bookSection"))
|
||||
|
||||
13
src/parsers/__init__.py
Normal file
13
src/parsers/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
__all__ = [
|
||||
"csv_to_list",
|
||||
"pdf_to_csv",
|
||||
"word_to_semap",
|
||||
"eml_parser",
|
||||
"eml_to_semap",
|
||||
]
|
||||
|
||||
|
||||
from .csv_parser import csv_to_list
|
||||
from .pdf_parser import pdf_to_csv
|
||||
from .word_parser import word_to_semap
|
||||
from .xml_parser import eml_parser, eml_to_semap
|
||||
23
src/parsers/csv_parser.py
Normal file
23
src/parsers/csv_parser.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import csv
|
||||
|
||||
from charset_normalizer import detect
|
||||
|
||||
|
||||
def csv_to_list(path: str) -> list[str]:
|
||||
"""
|
||||
Extracts the data from a csv file and returns it as a pandas dataframe
|
||||
"""
|
||||
encoding = detect(open(path, "rb").read())["encoding"]
|
||||
with open(path, newline="", encoding=encoding) as csvfile:
|
||||
# if decoder fails to map, assign ""
|
||||
reader = csv.reader(csvfile, delimiter=";", quotechar="|")
|
||||
ret = []
|
||||
for row in reader:
|
||||
ret.append(row[0].replace('"', ""))
|
||||
return ret
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
text = csv_to_list("C:/Users/aky547/Desktop/semap/71.csv")
|
||||
# remove linebreaks
|
||||
# #print(text)
|
||||
23
src/parsers/pdf_parser.py
Normal file
23
src/parsers/pdf_parser.py
Normal file
@@ -0,0 +1,23 @@
|
||||
# add depend path to system path
|
||||
|
||||
from pdfquery import PDFQuery
|
||||
|
||||
|
||||
def pdf_to_csv(path: str) -> str:
|
||||
"""
|
||||
Extracts the data from a pdf file and returns it as a pandas dataframe
|
||||
"""
|
||||
file = PDFQuery(path)
|
||||
file.load()
|
||||
# get the text from the pdf file
|
||||
text_elems = file.extract([("with_formatter", "text"), ("all_text", "*")])
|
||||
extracted_text = text_elems["all_text"]
|
||||
|
||||
return extracted_text
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
text = pdf_to_csv("54_pdf.pdf")
|
||||
# remove linebreaks
|
||||
text = text.replace("\n", "")
|
||||
# print(text)
|
||||
8
src/parsers/transformers/__init__.py
Normal file
8
src/parsers/transformers/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from .transformers import (
|
||||
RDS_AVAIL_DATA,
|
||||
ARRAYData,
|
||||
BibTeXData,
|
||||
COinSData,
|
||||
RDSData,
|
||||
RISData,
|
||||
)
|
||||
122
src/parsers/transformers/schemas.py
Normal file
122
src/parsers/transformers/schemas.py
Normal file
@@ -0,0 +1,122 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Any, List
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import field as dataclass_field
|
||||
import json
|
||||
|
||||
|
||||
@dataclass
|
||||
class Item:
|
||||
superlocation: str | None = dataclass_field(default_factory=str)
|
||||
status: str | None = dataclass_field(default_factory=str)
|
||||
availability: str | None = dataclass_field(default_factory=str)
|
||||
notes: str | None = dataclass_field(default_factory=str)
|
||||
limitation: str | None = dataclass_field(default_factory=str)
|
||||
duedate: str | None = dataclass_field(default_factory=str)
|
||||
id: str | None = dataclass_field(default_factory=str)
|
||||
item_id: str | None = dataclass_field(default_factory=str)
|
||||
ilslink: str | None = dataclass_field(default_factory=str)
|
||||
number: int | None = dataclass_field(default_factory=int)
|
||||
barcode: str | None = dataclass_field(default_factory=str)
|
||||
reserve: str | None = dataclass_field(default_factory=str)
|
||||
callnumber: str | None = dataclass_field(default_factory=str)
|
||||
department: str | None = dataclass_field(default_factory=str)
|
||||
locationhref: str | None = dataclass_field(default_factory=str)
|
||||
location: str | None = dataclass_field(default_factory=str)
|
||||
|
||||
def from_dict(self, data: dict):
|
||||
"""Import data from dict"""
|
||||
data = data["items"]
|
||||
for entry in data:
|
||||
for key, value in entry.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_AVAIL_DATA:
|
||||
"""Class to store RDS availability data"""
|
||||
|
||||
library_sigil: str = dataclass_field(default_factory=str)
|
||||
items: List[Item] = dataclass_field(default_factory=list)
|
||||
|
||||
def import_from_dict(self, data: str):
|
||||
"""Import data from dict"""
|
||||
edata = json.loads(data)
|
||||
# library sigil is first key
|
||||
|
||||
self.library_sigil = str(list(edata.keys())[0])
|
||||
# get data from first key
|
||||
edata = edata[self.library_sigil]
|
||||
for location in edata:
|
||||
item = Item(superlocation=location).from_dict(edata[location])
|
||||
|
||||
self.items.append(item)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_DATA:
|
||||
"""Class to store RDS data"""
|
||||
|
||||
RDS_SIGNATURE: str = dataclass_field(default_factory=str)
|
||||
RDS_STATUS: str = dataclass_field(default_factory=str)
|
||||
RDS_LOCATION: str = dataclass_field(default_factory=str)
|
||||
RDS_URL: Any = dataclass_field(default_factory=str)
|
||||
RDS_HINT: Any = dataclass_field(default_factory=str)
|
||||
RDS_COMMENT: Any = dataclass_field(default_factory=str)
|
||||
RDS_HOLDING: Any = dataclass_field(default_factory=str)
|
||||
RDS_HOLDING_LEAK: Any = dataclass_field(default_factory=str)
|
||||
RDS_INTERN: Any = dataclass_field(default_factory=str)
|
||||
RDS_PROVENIENCE: Any = dataclass_field(default_factory=str)
|
||||
RDS_LOCAL_NOTATION: str = dataclass_field(default_factory=str)
|
||||
RDS_LEA: Any = dataclass_field(default_factory=str)
|
||||
|
||||
def import_from_dict(self, data: dict) -> RDS_DATA:
|
||||
"""Import data from dict"""
|
||||
for key, value in data.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_GENERIC_DATA:
|
||||
LibrarySigil: str = dataclass_field(default_factory=str)
|
||||
RDS_DATA: List[RDS_DATA] = dataclass_field(default_factory=list)
|
||||
|
||||
def import_from_dict(self, data: str) -> RDS_GENERIC_DATA:
|
||||
"""Import data from dict"""
|
||||
edata = json.loads(data)
|
||||
# library sigil is first key
|
||||
self.LibrarySigil = str(list(edata.keys())[0])
|
||||
# get data from first key
|
||||
edata = edata[self.LibrarySigil]
|
||||
for entry in edata:
|
||||
rds_data = RDS_DATA() # Create a new RDS_DATA instance
|
||||
# Populate the RDS_DATA instance from the entry
|
||||
# This assumes that the entry is a dictionary that matches the structure of the RDS_DATA class
|
||||
rds_data.import_from_dict(entry)
|
||||
self.RDS_DATA.append(rds_data) # Add the RDS_DATA instance to the list
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoksatzData:
|
||||
type: Optional[str] = None
|
||||
adis_idn: Optional[str] = None
|
||||
t_idn: Optional[str] = None
|
||||
ktrl_nr: Optional[str] = None
|
||||
adis_isil: Optional[str] = None
|
||||
adis_sigel: Optional[str] = None
|
||||
bib_sigel: Optional[str] = None
|
||||
standort: Optional[str] = None
|
||||
signatur: Optional[str] = None
|
||||
ausleihcode: Optional[str] = None
|
||||
sig_katalog: Optional[str] = None
|
||||
erwerb_datum: Optional[str] = None
|
||||
medientypcode: Optional[str] = None
|
||||
bestellart: Optional[str] = None
|
||||
faecherstatistik: Optional[str] = None
|
||||
exemplar_stat: Optional[str] = None
|
||||
so_standort: Optional[str] = None
|
||||
522
src/parsers/transformers/transformers.py
Normal file
522
src/parsers/transformers/transformers.py
Normal file
@@ -0,0 +1,522 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import field as dataclass_field
|
||||
from typing import Any, List
|
||||
|
||||
import loguru
|
||||
|
||||
from src import LOG_DIR
|
||||
from src.logic.dataclass import BookData
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
|
||||
|
||||
###Pydatnic models
|
||||
@dataclass
|
||||
class Item:
|
||||
superlocation: str | None = dataclass_field(default_factory=str)
|
||||
status: str | None = dataclass_field(default_factory=str)
|
||||
availability: str | None = dataclass_field(default_factory=str)
|
||||
notes: str | None = dataclass_field(default_factory=str)
|
||||
limitation: str | None = dataclass_field(default_factory=str)
|
||||
duedate: str | None = dataclass_field(default_factory=str)
|
||||
id: str | None = dataclass_field(default_factory=str)
|
||||
item_id: str | None = dataclass_field(default_factory=str)
|
||||
ilslink: str | None = dataclass_field(default_factory=str)
|
||||
number: int | None = dataclass_field(default_factory=int)
|
||||
barcode: str | None = dataclass_field(default_factory=str)
|
||||
reserve: str | None = dataclass_field(default_factory=str)
|
||||
callnumber: str | None = dataclass_field(default_factory=str)
|
||||
department: str | None = dataclass_field(default_factory=str)
|
||||
locationhref: str | None = dataclass_field(default_factory=str)
|
||||
location: str | None = dataclass_field(default_factory=str)
|
||||
ktrl_nr: str | None = dataclass_field(default_factory=str)
|
||||
|
||||
def from_dict(self, data: dict):
|
||||
"""Import data from dict"""
|
||||
data = data["items"]
|
||||
for entry in data:
|
||||
for key, value in entry.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_AVAIL_DATA:
|
||||
"""Class to store RDS availability data"""
|
||||
|
||||
library_sigil: str = dataclass_field(default_factory=str)
|
||||
items: List[Item] = dataclass_field(default_factory=list)
|
||||
|
||||
def import_from_dict(self, data: str):
|
||||
"""Import data from dict"""
|
||||
edata = json.loads(data)
|
||||
# library sigil is first key
|
||||
|
||||
self.library_sigil = str(list(edata.keys())[0])
|
||||
# get data from first key
|
||||
edata = edata[self.library_sigil]
|
||||
for location in edata:
|
||||
item = Item(superlocation=location).from_dict(edata[location])
|
||||
|
||||
self.items.append(item)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_DATA:
|
||||
"""Class to store RDS data"""
|
||||
|
||||
RDS_SIGNATURE: str = dataclass_field(default_factory=str)
|
||||
RDS_STATUS: str = dataclass_field(default_factory=str)
|
||||
RDS_LOCATION: str = dataclass_field(default_factory=str)
|
||||
RDS_URL: Any = dataclass_field(default_factory=str)
|
||||
RDS_HINT: Any = dataclass_field(default_factory=str)
|
||||
RDS_COMMENT: Any = dataclass_field(default_factory=str)
|
||||
RDS_HOLDING: Any = dataclass_field(default_factory=str)
|
||||
RDS_HOLDING_LEAK: Any = dataclass_field(default_factory=str)
|
||||
RDS_INTERN: Any = dataclass_field(default_factory=str)
|
||||
RDS_PROVENIENCE: Any = dataclass_field(default_factory=str)
|
||||
RDS_LOCAL_NOTATION: str = dataclass_field(default_factory=str)
|
||||
RDS_LEA: Any = dataclass_field(default_factory=str)
|
||||
|
||||
def import_from_dict(self, data: dict) -> RDS_DATA:
|
||||
"""Import data from dict"""
|
||||
for key, value in data.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_GENERIC_DATA:
|
||||
LibrarySigil: str = dataclass_field(default_factory=str)
|
||||
RDS_DATA: List[RDS_DATA] = dataclass_field(default_factory=list)
|
||||
|
||||
def import_from_dict(self, data: str) -> RDS_GENERIC_DATA:
|
||||
"""Import data from dict"""
|
||||
edata = json.loads(data)
|
||||
# library sigil is first key
|
||||
self.LibrarySigil = str(list(edata.keys())[0])
|
||||
# get data from first key
|
||||
edata = edata[self.LibrarySigil]
|
||||
for entry in edata:
|
||||
rds_data = RDS_DATA() # Create a new RDS_DATA instance
|
||||
# Populate the RDS_DATA instance from the entry
|
||||
# This assumes that the entry is a dictionary that matches the structure of the RDS_DATA class
|
||||
rds_data.import_from_dict(entry)
|
||||
self.RDS_DATA.append(rds_data) # Add the RDS_DATA instance to the list
|
||||
return self
|
||||
|
||||
|
||||
class BaseStruct:
|
||||
def __init__(self, **kwargs):
|
||||
for key, value in kwargs.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
|
||||
class ARRAYData:
|
||||
def __init__(self, signature=None) -> None:
|
||||
self.signature = None
|
||||
pass
|
||||
|
||||
def transform(self, data: str) -> BookData:
|
||||
def _get_line(source: str, search: str) -> str:
|
||||
try:
|
||||
data = (
|
||||
source.split(search)[1]
|
||||
.split("\n")[0]
|
||||
.strip()
|
||||
.replace("=>", "")
|
||||
.strip()
|
||||
)
|
||||
return data
|
||||
|
||||
except Exception:
|
||||
# # log.debug(f"ARRAYData.transform failed, {source}, {search}")
|
||||
log.exception(f"ARRAYData.transform failed, no string {search}")
|
||||
return ""
|
||||
|
||||
def _get_list_entry(source: str, search: str, entry: str) -> str:
|
||||
try:
|
||||
source = source.replace("\t", "").replace("\r", "")
|
||||
source = source.split(search)[1].split(")")[0]
|
||||
return _get_line(source, entry).replace("=>", "").strip()
|
||||
except:
|
||||
return ""
|
||||
|
||||
def _get_isbn(source: str) -> list:
|
||||
try:
|
||||
isbn = source.split("[isbn]")[1].split(")")[0].strip()
|
||||
isbn = isbn.split("(")[1]
|
||||
isbns = isbn.split("=>")
|
||||
ret = []
|
||||
for _ in isbns:
|
||||
# remove _ from list
|
||||
isb = _.split("\n")[0].strip()
|
||||
if isb == "":
|
||||
continue
|
||||
ret.append(isb) if isb not in ret else None
|
||||
return ret
|
||||
except:
|
||||
isbn = []
|
||||
return isbn
|
||||
|
||||
def _get_signature(data):
|
||||
try:
|
||||
sig_data = (
|
||||
data.split("[loksatz]")[1]
|
||||
.split("[0] => ")[1]
|
||||
.split("\n")[0]
|
||||
.strip()
|
||||
)
|
||||
signature_data = eval(sig_data)
|
||||
return signature_data["signatur"]
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _get_author(data):
|
||||
try:
|
||||
array = data.split("[au_display_short]")[1].split(")\n")[0].strip()
|
||||
except Exception:
|
||||
return ""
|
||||
entries = array.split("\n")
|
||||
authors = []
|
||||
hg_present = False
|
||||
verf_present = False
|
||||
lines = []
|
||||
for entry in entries:
|
||||
if "=>" in entry:
|
||||
line = entry.split("=>")[1].strip()
|
||||
if "[HerausgeberIn]" in line:
|
||||
hg_present = True
|
||||
if "[VerfasserIn]" in line:
|
||||
verf_present = True
|
||||
lines.append(line)
|
||||
for line in lines:
|
||||
if hg_present and verf_present:
|
||||
if "[HerausgeberIn]" in line:
|
||||
authors.append(line.split("[")[0].strip())
|
||||
elif verf_present:
|
||||
if "[VerfasserIn]" in line:
|
||||
authors.append(line.split("[")[0].strip())
|
||||
else:
|
||||
pass
|
||||
return ";".join(authors)
|
||||
|
||||
def _get_title(data):
|
||||
titledata = None
|
||||
title = ""
|
||||
if "[ti_long]" in data:
|
||||
titledata = data.split("[ti_long]")[1].split(")\n")[0].strip()
|
||||
title = titledata.split("=>")[1].strip().split("/")[0].strip()
|
||||
if "[ti_long_f]" in data:
|
||||
titledata = data.split("[ti_long_f]")[1].split(")\n")[0].strip()
|
||||
title = titledata.split("=>")[1].strip().split("/")[0].strip()
|
||||
return title
|
||||
|
||||
def _get_adis_idn(data, signature):
|
||||
loksatz_match = re.search(
|
||||
r"\[loksatz\] => Array\s*\((.*?)\)", data, re.DOTALL
|
||||
)
|
||||
if loksatz_match:
|
||||
loksatz_content = loksatz_match.group(1)
|
||||
|
||||
# Step 2: Extract JSON objects within the loksatz section
|
||||
json_objects = re.findall(r"{.*?}", loksatz_content, re.DOTALL)
|
||||
# Print each JSON object
|
||||
for obj in json_objects:
|
||||
data = eval(obj)
|
||||
if data["signatur"] == signature:
|
||||
return data["adis_idn"]
|
||||
|
||||
def _get_in_apparat(data):
|
||||
loksatz_match = re.search(
|
||||
r"\[loksatz\] => Array\s*\((.*?)\)", data, re.DOTALL
|
||||
)
|
||||
if loksatz_match:
|
||||
loksatz_content = loksatz_match.group(1)
|
||||
|
||||
# Step 2: Extract JSON objects within the loksatz section
|
||||
json_objects = re.findall(r"{.*?}", loksatz_content, re.DOTALL)
|
||||
# Print each JSON object
|
||||
for obj in json_objects:
|
||||
data = eval(obj)
|
||||
if data["ausleihcode"] == "R" and data["standort"] == "40":
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
ppn = _get_line(data, "[kid]")
|
||||
title = _get_title(data).strip()
|
||||
author = _get_author(data)
|
||||
edition = _get_list_entry(data, "[ausgabe]", "[0]").replace(",", "")
|
||||
link = f"https://rds.ibs-bw.de/phfreiburg/link?kid={_get_line(data, '[kid]')}"
|
||||
isbn = _get_isbn(data)
|
||||
# [self._get_list_entry(data,"[isbn]","[0]"),self._get_list_entry(data,"[is]","[1]")],
|
||||
language = _get_list_entry(data, "[la_facet]", "[0]")
|
||||
publisher = _get_list_entry(data, "[pu]", "[0]")
|
||||
year = _get_list_entry(data, "[py_display]", "[0]")
|
||||
pages = _get_list_entry(data, "[umfang]", "[0]").split(":")[0].strip()
|
||||
signature = (
|
||||
self.signature if self.signature is not None else _get_signature(data)
|
||||
)
|
||||
|
||||
place = _get_list_entry(data, "[pp]", "[0]")
|
||||
adis_idn = _get_adis_idn(data, signature=signature)
|
||||
in_apparat = _get_in_apparat(data)
|
||||
return BookData(
|
||||
ppn=ppn,
|
||||
title=title,
|
||||
author=author,
|
||||
edition=edition,
|
||||
link=link,
|
||||
isbn=isbn,
|
||||
language=language,
|
||||
publisher=publisher,
|
||||
year=year,
|
||||
pages=pages,
|
||||
signature=signature,
|
||||
place=place,
|
||||
adis_idn=adis_idn,
|
||||
in_apparat=in_apparat,
|
||||
)
|
||||
|
||||
|
||||
class COinSData:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def transform(self, data: str) -> BookData:
|
||||
def _get_line(source: str, search: str) -> str:
|
||||
try:
|
||||
data = source.split(f"{search}=")[1] # .split("")[0].strip()
|
||||
return data.split("rft")[0].strip() if "rft" in data else data
|
||||
except:
|
||||
return ""
|
||||
|
||||
return BookData(
|
||||
ppn=_get_line(data, "rft_id").split("=")[1],
|
||||
title=_get_line(data, "rft.btitle"),
|
||||
author=f"{_get_line(data, 'rft.aulast')}, {_get_line(data, 'rft.aufirst')}",
|
||||
edition=_get_line(data, "rft.edition"),
|
||||
link=_get_line(data, "rft_id"),
|
||||
isbn=_get_line(data, "rft.isbn"),
|
||||
publisher=_get_line(data, "rft.pub"),
|
||||
year=_get_line(data, "rft.date"),
|
||||
pages=_get_line(data, "rft.tpages").split(":")[0].strip(),
|
||||
)
|
||||
|
||||
|
||||
class RISData:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def transform(self, data: str) -> BookData:
|
||||
def _get_line(source: str, search: str) -> str:
|
||||
try:
|
||||
data = source.split(f"{search} - ")[1] # .split("")[0].strip()
|
||||
return data.split("\n")[0].strip() if "\n" in data else data
|
||||
except:
|
||||
return ""
|
||||
|
||||
return BookData(
|
||||
ppn=_get_line(data, "DP").split("=")[1],
|
||||
title=_get_line(data, "TI"),
|
||||
signature=_get_line(data, "CN"),
|
||||
edition=_get_line(data, "ET").replace(",", ""),
|
||||
link=_get_line(data, "DP"),
|
||||
isbn=_get_line(data, "SN").split(","),
|
||||
author=_get_line(data, "AU").split("[")[0].strip(),
|
||||
language=_get_line(data, "LA"),
|
||||
publisher=_get_line(data, "PB"),
|
||||
year=_get_line(data, "PY"),
|
||||
pages=_get_line(data, "SP"),
|
||||
)
|
||||
|
||||
|
||||
class BibTeXData:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def transform(self, data: str) -> BookData:
|
||||
def _get_line(source: str, search: str) -> str:
|
||||
try:
|
||||
return (
|
||||
data.split(search)[1]
|
||||
.split("\n")[0]
|
||||
.strip()
|
||||
.split("=")[1]
|
||||
.strip()
|
||||
.replace("{", "")
|
||||
.replace("}", "")
|
||||
.replace(",", "")
|
||||
.replace("[", "")
|
||||
.replace("];", "")
|
||||
)
|
||||
except:
|
||||
return ""
|
||||
|
||||
return BookData(
|
||||
ppn=None,
|
||||
title=_get_line(data, "title"),
|
||||
signature=_get_line(data, "bestand"),
|
||||
edition=_get_line(data, "edition"),
|
||||
isbn=_get_line(data, "isbn"),
|
||||
author=";".join(_get_line(data, "author").split(" and ")),
|
||||
language=_get_line(data, "language"),
|
||||
publisher=_get_line(data, "publisher"),
|
||||
year=_get_line(data, "year"),
|
||||
pages=_get_line(data, "pages"),
|
||||
)
|
||||
|
||||
|
||||
class RDSData:
|
||||
def __init__(self):
|
||||
self.retlist = []
|
||||
|
||||
def transform(self, data: str):
|
||||
# rds_availability = RDS_AVAIL_DATA()
|
||||
# rds_data = RDS_GENERIC_DATA()
|
||||
print(data)
|
||||
|
||||
def __get_raw_data(data: str) -> list:
|
||||
# create base data to be turned into pydantic classes
|
||||
data = data.split("RDS ----------------------------------")[1]
|
||||
edata = data.strip()
|
||||
edata = edata.split("\n", 9)[9]
|
||||
edata = edata.split("\n")[1:]
|
||||
entry_1 = edata[0]
|
||||
edata = edata[1:]
|
||||
entry_2 = "".join(edata)
|
||||
edata = []
|
||||
edata.append(entry_1)
|
||||
edata.append(entry_2)
|
||||
return edata
|
||||
|
||||
ret_data = __get_raw_data(data)
|
||||
# assign data[1] to RDS_AVAIL_DATA
|
||||
# assign data[0] to RDS_DATA
|
||||
self.rds_data = RDS_GENERIC_DATA().import_from_dict(ret_data[1])
|
||||
self.rds_availability = RDS_AVAIL_DATA().import_from_dict(ret_data[0])
|
||||
self.retlist.append(self.rds_availability)
|
||||
self.retlist.append(self.rds_data)
|
||||
return self
|
||||
|
||||
def return_data(self, option=None):
|
||||
if option == "rds_availability":
|
||||
return self.retlist[0]
|
||||
elif option == "rds_data":
|
||||
return self.retlist[1]
|
||||
else:
|
||||
return {"rds_availability": self.retlist[0], "rds_data": self.retlist[1]}
|
||||
|
||||
|
||||
class DictToTable:
|
||||
def __init__(self):
|
||||
self.work_author = None
|
||||
self.section_author = None
|
||||
self.year = None
|
||||
self.edition = None
|
||||
self.work_title = None
|
||||
self.chapter_title = None
|
||||
self.location = None
|
||||
self.publisher = None
|
||||
self.signature = None
|
||||
self.type = None
|
||||
self.pages = None
|
||||
self.issue = None
|
||||
self.isbn = None
|
||||
|
||||
def makeResult(self):
|
||||
data = {
|
||||
"work_author": self.work_author,
|
||||
"section_author": self.section_author,
|
||||
"year": self.year,
|
||||
"edition": self.edition,
|
||||
"work_title": self.work_title,
|
||||
"chapter_title": self.chapter_title,
|
||||
"location": self.location,
|
||||
"publisher": self.publisher,
|
||||
"signature": self.signature,
|
||||
"issue": self.issue,
|
||||
"pages": self.pages,
|
||||
"isbn": self.isbn,
|
||||
"type": self.type,
|
||||
}
|
||||
data = {k: v for k, v in data.items() if v is not None}
|
||||
return data
|
||||
|
||||
def reset(self):
|
||||
for key in self.__dict__:
|
||||
setattr(self, key, None)
|
||||
|
||||
def transform(self, data: dict):
|
||||
mode = data["mode"]
|
||||
self.reset()
|
||||
if mode == "book":
|
||||
return self.book_assign(data)
|
||||
elif mode == "hg":
|
||||
return self.hg_assign(data)
|
||||
elif mode == "zs":
|
||||
return self.zs_assign(data)
|
||||
else:
|
||||
return None
|
||||
|
||||
def book_assign(self, data):
|
||||
self.type = "book"
|
||||
self.work_author = data["book_author"]
|
||||
self.signature = data["book_signature"]
|
||||
self.location = data["book_place"]
|
||||
self.year = data["book_year"]
|
||||
self.work_title = data["book_title"]
|
||||
self.edition = data["book_edition"]
|
||||
self.pages = data["book_pages"]
|
||||
self.publisher = data["book_publisher"]
|
||||
self.isbn = data["book_isbn"]
|
||||
return self.makeResult()
|
||||
|
||||
def hg_assign(self, data):
|
||||
self.type = "hg"
|
||||
self.section_author = data["hg_author"]
|
||||
self.work_author = data["hg_editor"]
|
||||
self.year = data["hg_year"]
|
||||
self.work_title = data["hg_title"]
|
||||
self.publisher = data["hg_publisher"]
|
||||
self.location = data["hg_place"]
|
||||
self.edition = data["hg_edition"]
|
||||
self.chapter_title = data["hg_chaptertitle"]
|
||||
self.pages = data["hg_pages"]
|
||||
self.signature = data["hg_signature"]
|
||||
self.isbn = data["hg_isbn"]
|
||||
return self.makeResult()
|
||||
|
||||
def zs_assign(self, data):
|
||||
self.type = "zs"
|
||||
self.section_author = data["zs_author"]
|
||||
self.chapter_title = data["zs_chapter_title"]
|
||||
self.location = data["zs_place"]
|
||||
self.issue = data["zs_issue"]
|
||||
self.pages = data["zs_pages"]
|
||||
self.publisher = data["zs_publisher"]
|
||||
self.isbn = data["zs_isbn"]
|
||||
|
||||
self.year = data["zs_year"]
|
||||
self.signature = data["zs_signature"]
|
||||
self.work_title = data["zs_title"]
|
||||
return self.makeResult()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with open("daiadata", "r") as f:
|
||||
data = f.read()
|
||||
|
||||
ret = RDSData().transform(data)
|
||||
data = ret.return_data("rds_availability")
|
||||
# log.debug(data)
|
||||
373
src/parsers/word_parser.py
Normal file
373
src/parsers/word_parser.py
Normal file
@@ -0,0 +1,373 @@
|
||||
import zipfile
|
||||
from typing import Any, Optional
|
||||
|
||||
import fitz # PyMuPDF
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
from docx import Document
|
||||
|
||||
from src.core.models import Book, SemapDocument
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
|
||||
doc = Document(path)
|
||||
tables = doc.tables
|
||||
m_data = []
|
||||
for table in tables:
|
||||
data = []
|
||||
for row in table.rows:
|
||||
row_data: list[Any] = []
|
||||
for cell in row.cells:
|
||||
text = cell.text
|
||||
|
||||
text = text.replace("\n", "")
|
||||
row_data.append(text)
|
||||
# if text == "Ihr Fach:":
|
||||
# row_data.append(get_fach(path))
|
||||
data.append(row_data)
|
||||
df = pd.DataFrame(data)
|
||||
df.columns = df.iloc[0]
|
||||
df = df.iloc[1:]
|
||||
|
||||
m_data.append(df)
|
||||
|
||||
return m_data
|
||||
|
||||
|
||||
def get_fach(path: str) -> Optional[str]:
|
||||
document = zipfile.ZipFile(path)
|
||||
xml_data = document.read("word/document.xml")
|
||||
document.close()
|
||||
|
||||
soup = BeautifulSoup(xml_data, "xml")
|
||||
# text we need is in <w:p w14:paraId="12456A32" ... > -> w:r -> w:t
|
||||
paragraphs = soup.find_all("w:p")
|
||||
for para in paragraphs:
|
||||
para_id = para.get("w14:paraId")
|
||||
if para_id == "12456A32":
|
||||
# get the data in the w:t
|
||||
for run in para.find_all("w:r"):
|
||||
data = run.find("w:t")
|
||||
if data and data.contents:
|
||||
return data.contents[0]
|
||||
return None
|
||||
|
||||
|
||||
def makeDict() -> dict[str, Optional[str]]:
|
||||
return {
|
||||
"work_author": None,
|
||||
"section_author": None,
|
||||
"year": None,
|
||||
"edition": None,
|
||||
"work_title": None,
|
||||
"chapter_title": None,
|
||||
"location": None,
|
||||
"publisher": None,
|
||||
"signature": None,
|
||||
"issue": None,
|
||||
"pages": None,
|
||||
"isbn": None,
|
||||
"type": None,
|
||||
}
|
||||
|
||||
|
||||
def tuple_to_dict(tlist: tuple, type: str) -> list[dict[str, Optional[str]]]:
|
||||
ret: list[dict[str, Optional[str]]] = []
|
||||
for line in tlist:
|
||||
data = makeDict()
|
||||
if type == "Monografien":
|
||||
data["type"] = type
|
||||
data["work_author"] = line[0]
|
||||
data["year"] = line[1]
|
||||
data["edition"] = line[2]
|
||||
data["work_title"] = line[3]
|
||||
data["location"] = line[4]
|
||||
data["publisher"] = line[5]
|
||||
data["signature"] = line[6]
|
||||
data["pages"] = line[7]
|
||||
elif type == "Herausgeberwerke":
|
||||
data["type"] = type
|
||||
data["section_author"] = line[0]
|
||||
data["year"] = line[1]
|
||||
data["edition"] = line[2]
|
||||
data["chapter_title"] = line[3]
|
||||
data["work_author"] = line[4]
|
||||
data["work_title"] = line[5]
|
||||
data["location"] = line[6]
|
||||
data["publisher"] = line[7]
|
||||
data["signature"] = line[9]
|
||||
data["pages"] = line[8]
|
||||
elif type == "Zeitschriftenaufsätze":
|
||||
data["type"] = type
|
||||
data["section_author"] = line[0]
|
||||
data["year"] = line[1]
|
||||
data["issue"] = line[2]
|
||||
data["chapter_title"] = line[3]
|
||||
data["work_title"] = line[4]
|
||||
data["location"] = line[5]
|
||||
data["publisher"] = line[6]
|
||||
data["signature"] = line[8]
|
||||
data["pages"] = line[7]
|
||||
ret.append(data)
|
||||
return ret
|
||||
|
||||
|
||||
def elsa_word_to_csv(path: str) -> tuple[list[dict[str, Optional[str]]], str]:
|
||||
doc = Document(path)
|
||||
# # print all lines in doc
|
||||
doctype = [para.text for para in doc.paragraphs if para.text != ""][-1]
|
||||
tuples = {
|
||||
"Monografien": ("", "", "", "", "", "", "", "", ""),
|
||||
"Herausgeberwerke": ("", "", "", "", "", "", "", "", "", "", ""),
|
||||
"Zeitschriftenaufsätze": ("", "", "", "", "", "", "", "", "", ""),
|
||||
}
|
||||
tables = doc.tables
|
||||
|
||||
m_data: list[pd.DataFrame] = []
|
||||
for table in tables:
|
||||
data: list[list[str]] = []
|
||||
for row in table.rows:
|
||||
row_data: list[str] = []
|
||||
for cell in row.cells:
|
||||
text = cell.text
|
||||
text = text.replace("\n", "")
|
||||
text = text.replace("\u2002", "")
|
||||
row_data.append(text)
|
||||
data.append(row_data)
|
||||
df = pd.DataFrame(data)
|
||||
df.columns = df.iloc[0]
|
||||
df = df.iloc[1:]
|
||||
m_data.append(df)
|
||||
df = m_data[0]
|
||||
# split df to rows
|
||||
data = [
|
||||
row for row in df.itertuples(index=False, name=None) if row != tuples[doctype]
|
||||
]
|
||||
# log.debug(data)
|
||||
return tuple_to_dict(data, doctype), doctype
|
||||
|
||||
|
||||
def word_to_semap(word_path: str, ai: bool = True) -> SemapDocument:
|
||||
log.info("Parsing Word Document {}", word_path)
|
||||
semap = SemapDocument()
|
||||
df = word_docx_to_csv(word_path)
|
||||
apparatdata = df[0]
|
||||
apparatdata = apparatdata.to_dict()
|
||||
keys = list(apparatdata.keys())
|
||||
# print(apparatdata, keys)
|
||||
|
||||
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys) - 1, 2)}
|
||||
semap.phoneNumber = appdata["Telefon:"]
|
||||
semap.subject = appdata["Ihr Fach:"]
|
||||
semap.mail = appdata["Mailadresse:"]
|
||||
semap.personName = ",".join(appdata["Ihr Name und Titel:"].split(",")[:-1])
|
||||
semap.personTitle = ",".join(appdata["Ihr Name und Titel:"].split(",")[-1:]).strip()
|
||||
apparatdata = df[1]
|
||||
apparatdata = apparatdata.to_dict()
|
||||
keys = list(apparatdata.keys())
|
||||
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
|
||||
semap.title = appdata["Veranstaltung:"]
|
||||
semap.semester = appdata["Semester:"]
|
||||
if ai:
|
||||
semap.renameSemester
|
||||
semap.nameSetter
|
||||
|
||||
books = df[2]
|
||||
booklist = []
|
||||
for i in range(len(books)):
|
||||
if books.iloc[i].isnull().all():
|
||||
continue
|
||||
data = books.iloc[i].to_dict()
|
||||
book = Book()
|
||||
book.from_dict(data)
|
||||
if book.is_empty:
|
||||
continue
|
||||
elif not book.has_signature:
|
||||
continue
|
||||
else:
|
||||
booklist.append(book)
|
||||
log.info("Found {} books", len(booklist))
|
||||
semap.books = booklist
|
||||
return semap
|
||||
|
||||
|
||||
def pdf_to_semap(pdf_path: str, ai: bool = True) -> SemapDocument:
|
||||
"""
|
||||
Parse a Semesterapparat PDF like the sample you provided and return a SemapDocument.
|
||||
- No external programs, only PyMuPDF.
|
||||
- Robust to multi-line field values (e.g., hyphenated emails) and multi-line table cells.
|
||||
- Works across multiple pages; headers only need to exist on the first page.
|
||||
"""
|
||||
doc = fitz.open(pdf_path)
|
||||
semap = SemapDocument()
|
||||
|
||||
# ---------- helpers ----------
|
||||
def _join_tokens(tokens: list[str]) -> str:
|
||||
"""Join tokens, preserving hyphen/URL joins across line wraps."""
|
||||
parts = []
|
||||
for tok in tokens:
|
||||
if parts and (
|
||||
parts[-1].endswith("-")
|
||||
or parts[-1].endswith("/")
|
||||
or parts[-1].endswith(":")
|
||||
):
|
||||
parts[-1] = parts[-1] + tok # no space after '-', '/' or ':'
|
||||
else:
|
||||
parts.append(tok)
|
||||
return " ".join(parts).strip()
|
||||
|
||||
def _extract_row_values_multiline(
|
||||
page, labels: list[str], y_window: float = 24
|
||||
) -> dict[str, str]:
|
||||
"""For a row of inline labels (e.g., Name/Fach/Telefon/Mail), grab text to the right of each label."""
|
||||
rects = []
|
||||
for lab in labels:
|
||||
hits = page.search_for(lab)
|
||||
if hits:
|
||||
rects.append((lab, hits[0]))
|
||||
if not rects:
|
||||
return {}
|
||||
|
||||
rects.sort(key=lambda t: t[1].x0)
|
||||
words = page.get_text("words")
|
||||
out = {}
|
||||
for i, (lab, r) in enumerate(rects):
|
||||
x0 = r.x1 + 1
|
||||
x1 = rects[i + 1][1].x0 - 1 if i + 1 < len(rects) else page.rect.width - 5
|
||||
y0 = r.y0 - 3
|
||||
y1 = r.y0 + y_window
|
||||
toks = [w for w in words if x0 <= w[0] <= x1 and y0 <= w[1] <= y1]
|
||||
toks.sort(key=lambda w: (w[1], w[0])) # line, then x
|
||||
out[lab] = _join_tokens([w[4] for w in toks])
|
||||
return out
|
||||
|
||||
def _compute_columns_from_headers(page0):
|
||||
"""Find column headers (once) and derive column centers + header baseline."""
|
||||
headers = [
|
||||
("Autorenname(n):", "Autorenname(n):Nachname, Vorname"),
|
||||
("Jahr/Auflage", "Jahr/Auflage"),
|
||||
("Titel", "Titel"),
|
||||
("Ort und Verlag", "Ort und Verlag"),
|
||||
("Standnummer", "Standnummer"),
|
||||
("Interne Vermerke", "Interne Vermerke"),
|
||||
]
|
||||
found = []
|
||||
for label, canon in headers:
|
||||
rects = [
|
||||
r for r in page0.search_for(label) if r.y0 > 200
|
||||
] # skip top-of-form duplicates
|
||||
if rects:
|
||||
found.append((canon, rects[0]))
|
||||
found.sort(key=lambda t: t[1].x0)
|
||||
cols = [(canon, r.x0, r.x1, (r.x0 + r.x1) / 2.0) for canon, r in found]
|
||||
header_y = min(r.y0 for _, r in found) if found else 0
|
||||
return cols, header_y
|
||||
|
||||
def _extract_table_rows_from_page(
|
||||
page, cols, header_y, y_top_margin=5, y_bottom_margin=40, y_tol=26.0
|
||||
):
|
||||
"""
|
||||
Group words into logical rows (tolerant to wrapped lines), then map each word
|
||||
to the nearest column by x-center and join tokens per column.
|
||||
"""
|
||||
words = [
|
||||
w
|
||||
for w in page.get_text("words")
|
||||
if w[1] > header_y + y_top_margin
|
||||
and w[3] < page.rect.height - y_bottom_margin
|
||||
]
|
||||
|
||||
# group into row bands by y (tolerance big enough to capture wrapped lines, but below next row gap)
|
||||
rows = []
|
||||
for w in sorted(words, key=lambda w: w[1]):
|
||||
y = w[1]
|
||||
for row in rows:
|
||||
if abs(row["y_mean"] - y) <= y_tol:
|
||||
row["ys"].append(y)
|
||||
row["y_mean"] = sum(row["ys"]) / len(row["ys"])
|
||||
row["words"].append(w)
|
||||
break
|
||||
else:
|
||||
rows.append({"y_mean": y, "ys": [y], "words": [w]})
|
||||
|
||||
# map to columns + join
|
||||
joined_rows = []
|
||||
for row in rows:
|
||||
rowdict = {canon: "" for canon, *_ in cols}
|
||||
words_by_col = {canon: [] for canon, *_ in cols}
|
||||
for w in sorted(row["words"], key=lambda w: (w[1], w[0])):
|
||||
xmid = (w[0] + w[2]) / 2.0
|
||||
canon = min(cols, key=lambda c: abs(xmid - c[3]))[0]
|
||||
words_by_col[canon].append(w[4])
|
||||
for canon, toks in words_by_col.items():
|
||||
rowdict[canon] = _join_tokens(toks)
|
||||
if any(v for v in rowdict.values()):
|
||||
joined_rows.append(rowdict)
|
||||
return joined_rows
|
||||
|
||||
# ---------- top-of-form fields ----------
|
||||
p0 = doc[0]
|
||||
row1 = _extract_row_values_multiline(
|
||||
p0,
|
||||
["Ihr Name und Titel:", "Ihr Fach:", "Telefon:", "Mailadresse:"],
|
||||
y_window=22,
|
||||
)
|
||||
row2 = _extract_row_values_multiline(
|
||||
p0, ["Veranstaltung:", "Semester:"], y_window=20
|
||||
)
|
||||
|
||||
name_title = row1.get("Ihr Name und Titel:", "") or ""
|
||||
semap.subject = row1.get("Ihr Fach:", None)
|
||||
semap.phoneNumber = row1.get("Telefon:", None) # keep as-is (string like "682-308")
|
||||
semap.mail = row1.get("Mailadresse:", None)
|
||||
semap.personName = ",".join(name_title.split(",")[:-1]) if name_title else None
|
||||
semap.personTitle = (
|
||||
",".join(name_title.split(",")[-1:]).strip() if name_title else None
|
||||
)
|
||||
|
||||
semap.title = row2.get("Veranstaltung:", None)
|
||||
semap.semester = row2.get("Semester:", None)
|
||||
|
||||
# ---------- table extraction (all pages) ----------
|
||||
cols, header_y = _compute_columns_from_headers(p0)
|
||||
all_rows: list[dict[str, Any]] = []
|
||||
for pn in range(len(doc)):
|
||||
all_rows.extend(_extract_table_rows_from_page(doc[pn], cols, header_y))
|
||||
|
||||
# drop the sub-header line "Nachname, Vorname" etc.
|
||||
filtered = []
|
||||
for r in all_rows:
|
||||
if r.get("Autorenname(n):Nachname, Vorname", "").strip() in (
|
||||
"",
|
||||
"Nachname, Vorname",
|
||||
):
|
||||
# skip if it's just the sub-header line
|
||||
if all(not r[c] for c in r if c != "Autorenname(n):Nachname, Vorname"):
|
||||
continue
|
||||
filtered.append(r)
|
||||
|
||||
# build Book objects (same filters as your word parser)
|
||||
booklist: list[Book] = []
|
||||
for row in filtered:
|
||||
b = Book()
|
||||
b.from_dict(row)
|
||||
if b.is_empty:
|
||||
continue
|
||||
if not b.has_signature:
|
||||
continue
|
||||
booklist.append(b)
|
||||
|
||||
semap.books = booklist
|
||||
|
||||
# keep parity with your post-processing
|
||||
if ai:
|
||||
_ = semap.renameSemester
|
||||
_ = semap.nameSetter
|
||||
|
||||
return semap
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
else_df = pdf_to_semap("C:/Users/aky547/Dokumente/testsemap.pdf")
|
||||
# print(else_df)
|
||||
67
src/parsers/xml_parser.py
Normal file
67
src/parsers/xml_parser.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from src.core.models import Apparat, BookData, SemapDocument, XMLMailSubmission
|
||||
from src.core.semester import Semester
|
||||
|
||||
|
||||
def parse_xml_submission(xml_string: str) -> XMLMailSubmission:
|
||||
"""
|
||||
Parse an XML string representing a mail submission and return an XMLMailSubmission object.
|
||||
"""
|
||||
submission = XMLMailSubmission()
|
||||
root = ET.fromstring(xml_string)
|
||||
static_data = root.find("static")
|
||||
static_info = {child.tag: child.text for child in static_data}
|
||||
books = root.find("books")
|
||||
books_info = []
|
||||
for book in books:
|
||||
book_details = {detail.tag: detail.text for detail in book}
|
||||
book = BookData(
|
||||
author=book_details.get("authorname"),
|
||||
year=book_details.get("year").split("/")[0]
|
||||
if "/" in book_details.get("year")
|
||||
else book_details.get("year"),
|
||||
edition=book_details.get("year").split("/")[1]
|
||||
if "/" in book_details.get("year")
|
||||
else None,
|
||||
title=book_details.get("title"),
|
||||
signature=book_details.get("signature"),
|
||||
)
|
||||
books_info.append(book)
|
||||
# Extract static data
|
||||
submission.name = static_info.get("name")
|
||||
submission.lastname = static_info.get("lastname")
|
||||
submission.title = static_info.get("title")
|
||||
submission.telno = int(static_info.get("telno"))
|
||||
submission.email = static_info.get("mail")
|
||||
submission.app_name = static_info.get("apparatsname")
|
||||
submission.subject = static_info.get("subject")
|
||||
sem_year = static_info.get("semester").split()[1]
|
||||
sem_term = static_info.get("semester").split()[0]
|
||||
submission.semester = Semester(semester=sem_term, year=int(sem_year))
|
||||
submission.books = books_info
|
||||
# Extract book information
|
||||
# book_info = []
|
||||
# for book in books:
|
||||
# book_details = {detail.tag: detail.text for detail in book}
|
||||
# book_info.append(book_details)
|
||||
return submission
|
||||
|
||||
|
||||
def eml_parser(path: str) -> XMLMailSubmission:
|
||||
with open(path, "r", encoding="utf-8") as file:
|
||||
xml_content = file.read().split("\n\n", 1)[1] # Skip headers
|
||||
print("EML content loaded, parsing XML...")
|
||||
print(xml_content)
|
||||
return parse_xml_submission(xml_content)
|
||||
|
||||
|
||||
def eml_to_semap(xml_mail: XMLMailSubmission) -> SemapDocument:
|
||||
submission = eml_parser(xml_mail)
|
||||
semap_doc = SemapDocument(
|
||||
# prof=Prof(name=submission.name, lastname=submission.lastname, email=submission.email),
|
||||
apparat=Apparat(name=submission.app_name, subject=submission.subject),
|
||||
semester=submission.semester,
|
||||
books=submission.books,
|
||||
)
|
||||
return semap_doc
|
||||
16
src/services/__init__.py
Normal file
16
src/services/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""External service integrations and API clients."""
|
||||
|
||||
from .catalogue import Catalogue
|
||||
from .sru import SWB
|
||||
from .lehmanns import LehmannsClient
|
||||
from .zotero import ZoteroController
|
||||
from .webrequest import BibTextTransformer, WebRequest
|
||||
|
||||
__all__ = [
|
||||
"Catalogue",
|
||||
"SWB",
|
||||
"LehmannsClient",
|
||||
"ZoteroController",
|
||||
"BibTextTransformer",
|
||||
"WebRequest",
|
||||
]
|
||||
292
src/services/catalogue.py
Normal file
292
src/services/catalogue.py
Normal file
@@ -0,0 +1,292 @@
|
||||
from typing import List
|
||||
|
||||
import regex
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from src.core.models import BookData as Book
|
||||
from src.shared.logging import log
|
||||
|
||||
URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||
BASE = "https://rds.ibs-bw.de"
|
||||
|
||||
|
||||
class Catalogue:
|
||||
def __init__(self, timeout=15):
|
||||
self.timeout = timeout
|
||||
reachable = self.check_connection()
|
||||
if not reachable:
|
||||
log.error("No internet connection available.")
|
||||
raise ConnectionError("No internet connection available.")
|
||||
|
||||
def check_connection(self):
|
||||
try:
|
||||
response = requests.get("https://www.google.com", timeout=self.timeout)
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
except requests.exceptions.RequestException as e:
|
||||
log.error(f"Could not connect to google.com: {e}")
|
||||
|
||||
def search_book(self, searchterm: str):
|
||||
response = requests.get(URL.format(searchterm), timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def search(self, link: str):
|
||||
response = requests.get(link, timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def get_book_links(self, searchterm: str) -> List[str]:
|
||||
response = self.search_book(searchterm)
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
links = soup.find_all("a", class_="title getFull")
|
||||
res: List[str] = []
|
||||
for link in links:
|
||||
res.append(BASE + link["href"]) # type: ignore
|
||||
return res
|
||||
|
||||
def get_book(self, searchterm: str):
|
||||
log.info(f"Searching for term: {searchterm}")
|
||||
|
||||
links = self.get_book_links(searchterm)
|
||||
print(links)
|
||||
for elink in links:
|
||||
result = self.search(elink)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
|
||||
# Optional (unchanged): title and ppn if you need them
|
||||
title_el = soup.find("div", class_="headline text")
|
||||
title = title_el.get_text(strip=True) if title_el else None
|
||||
|
||||
ppn_el = soup.find(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"
|
||||
)
|
||||
# in ppn_el, get text of div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
|
||||
ppn = (
|
||||
ppn_el.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
).get_text(strip=True)
|
||||
if ppn_el
|
||||
else None
|
||||
)
|
||||
|
||||
# get edition text at div class col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION
|
||||
edition_el = soup.find(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION"
|
||||
)
|
||||
edition = (
|
||||
edition_el.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
).get_text(strip=True)
|
||||
if edition_el
|
||||
else None
|
||||
)
|
||||
|
||||
authors = soup.find_all(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
|
||||
)
|
||||
author = None
|
||||
if authors:
|
||||
# get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
|
||||
author_names = []
|
||||
for author in authors:
|
||||
panel = author.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
)
|
||||
if panel:
|
||||
links = panel.find_all("a")
|
||||
for link in links:
|
||||
author_names.append(link.text.strip())
|
||||
author = (
|
||||
";".join(author_names) if len(author_names) > 1 else author_names[0]
|
||||
)
|
||||
signature = None
|
||||
|
||||
panel = soup.select_one("div.panel-body")
|
||||
if panel:
|
||||
# Collect the RDS_* blocks in order, using the 'space' divs as separators
|
||||
groups = []
|
||||
cur = {}
|
||||
for node in panel.select(
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
|
||||
):
|
||||
classes = node.get("class", [])
|
||||
# Separator between entries
|
||||
if "space" in classes:
|
||||
if cur:
|
||||
groups.append(cur)
|
||||
cur = {}
|
||||
continue
|
||||
|
||||
# Read the value from the corresponding panel cell
|
||||
val_el = node.select_one(".rds-dl-panel")
|
||||
val = (
|
||||
val_el.get_text(" ", strip=True)
|
||||
if val_el
|
||||
else node.get_text(" ", strip=True)
|
||||
)
|
||||
|
||||
if "RDS_SIGNATURE" in classes:
|
||||
cur["signature"] = val
|
||||
elif "RDS_STATUS" in classes:
|
||||
cur["status"] = val
|
||||
elif "RDS_LOCATION" in classes:
|
||||
cur["location"] = val
|
||||
|
||||
if cur: # append the last group if not followed by a space
|
||||
groups.append(cur)
|
||||
|
||||
# Find the signature for the entry whose location mentions "Semesterapparat"
|
||||
for g in groups:
|
||||
loc = g.get("location", "").lower()
|
||||
if "semesterapparat" in loc:
|
||||
signature = g.get("signature")
|
||||
return Book(
|
||||
title=title,
|
||||
ppn=ppn,
|
||||
signature=signature,
|
||||
library_location=loc.split("-")[-1],
|
||||
link=elink,
|
||||
author=author,
|
||||
edition=edition,
|
||||
)
|
||||
else:
|
||||
return Book(
|
||||
title=title,
|
||||
ppn=ppn,
|
||||
signature=signature,
|
||||
library_location=loc.split("\n\n")[-1],
|
||||
link=elink,
|
||||
author=author,
|
||||
edition=edition,
|
||||
)
|
||||
|
||||
def get(self, ppn: str) -> Book | None:
|
||||
# based on PPN, get title, people, edition, year, language, pages, isbn,
|
||||
link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}"
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
|
||||
def get_ppn(self, searchterm: str) -> str | None:
|
||||
links = self.get_book_links(searchterm)
|
||||
ppn = None
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
print(link)
|
||||
ppn = link.split("/")[-1]
|
||||
if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn):
|
||||
return ppn
|
||||
return ppn
|
||||
|
||||
def get_semesterapparat_number(self, searchterm: str) -> int:
|
||||
links = self.get_book_links(searchterm)
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
|
||||
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||
for location_el in locations:
|
||||
if "Semesterapparat-" in location_el.text:
|
||||
match = regex.search(r"Semesterapparat-(\d+)", location_el.text)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
if "Handbibliothek-" in location_el.text:
|
||||
return location_el.text.strip().split("\n\n")[-1].strip()
|
||||
return location_el.text.strip().split("\n\n")[-1].strip()
|
||||
return 0
|
||||
|
||||
def get_author(self, link: str) -> str:
|
||||
links = self.get_book_links(f"kid:{link}")
|
||||
author = None
|
||||
for link in links:
|
||||
# print(link)
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
# get all authors, return them as a string seperated by ;
|
||||
authors = soup.find_all(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
|
||||
)
|
||||
if authors:
|
||||
# get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
|
||||
author_names = []
|
||||
for author in authors:
|
||||
panel = author.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
)
|
||||
if panel:
|
||||
links = panel.find_all("a")
|
||||
for link in links:
|
||||
author_names.append(link.text.strip())
|
||||
author = "; ".join(author_names)
|
||||
return author
|
||||
|
||||
def get_signature(self, isbn: str):
|
||||
links = self.get_book_links(f"{isbn}")
|
||||
signature = None
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
panel = soup.select_one("div.panel-body")
|
||||
if panel:
|
||||
# Collect the RDS_* blocks in order, using the 'space' divs as separators
|
||||
groups = []
|
||||
cur = {}
|
||||
for node in panel.select(
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
|
||||
):
|
||||
classes = node.get("class", [])
|
||||
# Separator between entries
|
||||
if "space" in classes:
|
||||
if cur:
|
||||
groups.append(cur)
|
||||
cur = {}
|
||||
continue
|
||||
|
||||
# Read the value from the corresponding panel cell
|
||||
val_el = node.select_one(".rds-dl-panel")
|
||||
val = (
|
||||
val_el.get_text(" ", strip=True)
|
||||
if val_el
|
||||
else node.get_text(" ", strip=True)
|
||||
)
|
||||
|
||||
if "RDS_SIGNATURE" in classes:
|
||||
cur["signature"] = val
|
||||
elif "RDS_STATUS" in classes:
|
||||
cur["status"] = val
|
||||
elif "RDS_LOCATION" in classes:
|
||||
cur["location"] = val
|
||||
|
||||
if cur: # append the last group if not followed by a space
|
||||
groups.append(cur)
|
||||
|
||||
# Find the signature for the entry whose location mentions "Semesterapparat"
|
||||
for g in groups:
|
||||
print(g)
|
||||
loc = g.get("location", "").lower()
|
||||
if "semesterapparat" in loc:
|
||||
signature = g.get("signature")
|
||||
return signature
|
||||
else:
|
||||
signature = g.get("signature")
|
||||
return signature
|
||||
print("No signature found")
|
||||
return signature
|
||||
|
||||
def in_library(self, ppn: str) -> bool:
|
||||
if ppn is None:
|
||||
return False
|
||||
links = self.get_book_links(f"kid:{ppn}")
|
||||
return len(links) > 0
|
||||
|
||||
def get_location(self, ppn: str) -> str | None:
|
||||
if ppn is None:
|
||||
return None
|
||||
link = self.get_book(f"{ppn}")
|
||||
if link is None:
|
||||
return None
|
||||
return link.library_location
|
||||
312
src/services/lehmanns.py
Normal file
312
src/services/lehmanns.py
Normal file
@@ -0,0 +1,312 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import Iterable, List, Optional
|
||||
from urllib.parse import quote_plus, urljoin
|
||||
|
||||
import httpx
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from src.core.models import BookData
|
||||
|
||||
BASE = "https://www.lehmanns.de"
|
||||
SEARCH_URL = "https://www.lehmanns.de/search/quick?mediatype_id=&q="
|
||||
|
||||
|
||||
@dataclass
|
||||
class LehmannsSearchResult:
|
||||
title: str
|
||||
url: str
|
||||
|
||||
# Core fields from the listing card
|
||||
year: Optional[int] = None
|
||||
edition: Optional[int] = None
|
||||
publisher: Optional[str] = None
|
||||
isbn13: Optional[str] = None
|
||||
|
||||
# Extras from the listing card
|
||||
description: Optional[str] = None
|
||||
authors: list[str] = field(default_factory=list)
|
||||
media_type: Optional[str] = None
|
||||
book_format: Optional[str] = None
|
||||
price_eur: Optional[float] = None
|
||||
currency: str = "EUR"
|
||||
image: Optional[str] = None
|
||||
|
||||
# From detail page:
|
||||
pages: Optional[str] = None # "<N> Seiten"
|
||||
buyable: bool = True # set in enrich_pages (detail page)
|
||||
unavailable_hint: Optional[str] = (
|
||||
None # e.g. "Titel ist leider vergriffen; keine Neuauflage"
|
||||
)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
class LehmannsClient:
|
||||
"""Scrapes quick-search results, then enriches (and filters) via product pages."""
|
||||
|
||||
def __init__(self, timeout: float = 20.0):
|
||||
self.client = httpx.Client(
|
||||
headers={
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/124.0 Safari/537.36"
|
||||
),
|
||||
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
},
|
||||
timeout=timeout,
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
||||
def close(self):
|
||||
self.client.close()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc):
|
||||
self.close()
|
||||
|
||||
# ------------------- Search (listing) -------------------
|
||||
|
||||
def build_search_url(self, title: str) -> str:
|
||||
# spaces -> '+'
|
||||
return SEARCH_URL + quote_plus(title)
|
||||
|
||||
def search_by_title(
|
||||
self,
|
||||
title: str,
|
||||
limit: Optional[int] = None,
|
||||
strict: bool = False,
|
||||
only_latest: bool = True,
|
||||
) -> List[BookData]:
|
||||
"""
|
||||
Parse the listing page only (no availability check here).
|
||||
Use enrich_pages(...) afterwards to fetch detail pages, add 'pages',
|
||||
and drop unbuyable items.
|
||||
"""
|
||||
url = self.build_search_url(title=title)
|
||||
html = self._get(url)
|
||||
if not html:
|
||||
return []
|
||||
results = self._parse_results(html)
|
||||
self.enrich_pages(results)
|
||||
|
||||
results = [BookData().from_LehmannsSearchResult(r) for r in results]
|
||||
if strict:
|
||||
# filter results to only those with exact title match (case-insensitive)
|
||||
title_lower = title.lower()
|
||||
results = [r for r in results if r.title and r.title.lower() == title_lower]
|
||||
# results = [r for r in results if r.buyable]
|
||||
return results
|
||||
if limit is not None:
|
||||
results = results[: max(0, limit)]
|
||||
if only_latest and len(results) > 1:
|
||||
# keep only the latest edition (highest edition number)
|
||||
results.sort(key=lambda r: (r.edition_number or 0), reverse=True)
|
||||
results = [results[0]]
|
||||
return results
|
||||
|
||||
# ------------------- Detail enrichment & filtering -------------------
|
||||
|
||||
def enrich_pages(
|
||||
self, results: Iterable[LehmannsSearchResult], drop_unbuyable: bool = True
|
||||
) -> List[LehmannsSearchResult]:
|
||||
"""
|
||||
Fetch each result.url, extract:
|
||||
- pages: from <span class="book-meta meta-seiten" itemprop="numberOfPages">...</span>
|
||||
- availability: from <li class="availability-3">...</li>
|
||||
* if it contains "Titel ist leider vergriffen", mark buyable=False
|
||||
* if it also contains "keine Neuauflage", set unavailable_hint accordingly
|
||||
If drop_unbuyable=True, exclude non-buyable results from the returned list.
|
||||
"""
|
||||
enriched: List[LehmannsSearchResult] = []
|
||||
for r in results:
|
||||
try:
|
||||
html = self._get(r.url)
|
||||
if not html:
|
||||
# Can't verify; keep as-is when not dropping, else skip
|
||||
if not drop_unbuyable:
|
||||
enriched.append(r)
|
||||
continue
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser") # type: ignore
|
||||
|
||||
# Pages
|
||||
pages_node = soup.select_one( # type: ignore
|
||||
"span.book-meta.meta-seiten[itemprop='numberOfPages'], "
|
||||
"span.book-meta.meta-seiten[itemprop='numberofpages'], "
|
||||
".meta-seiten [itemprop='numberOfPages'], "
|
||||
".meta-seiten[itemprop='numberOfPages'], "
|
||||
".book-meta.meta-seiten"
|
||||
)
|
||||
if pages_node:
|
||||
text = pages_node.get_text(" ", strip=True)
|
||||
m = re.search(r"\d+", text)
|
||||
if m:
|
||||
r.pages = f"{m.group(0)} Seiten"
|
||||
|
||||
# Availability via li.availability-3
|
||||
avail_li = soup.select_one("li.availability-3") # type: ignore
|
||||
if avail_li:
|
||||
avail_text = " ".join(
|
||||
avail_li.get_text(" ", strip=True).split()
|
||||
).lower()
|
||||
if "titel ist leider vergriffen" in avail_text:
|
||||
r.buyable = False
|
||||
if "keine neuauflage" in avail_text:
|
||||
r.unavailable_hint = (
|
||||
"Titel ist leider vergriffen; keine Neuauflage"
|
||||
)
|
||||
else:
|
||||
r.unavailable_hint = "Titel ist leider vergriffen"
|
||||
|
||||
# Append or drop
|
||||
if (not drop_unbuyable) or r.buyable:
|
||||
enriched.append(r)
|
||||
|
||||
except Exception:
|
||||
# On any per-item error, keep the record if not dropping; else skip
|
||||
if not drop_unbuyable:
|
||||
enriched.append(r)
|
||||
continue
|
||||
|
||||
return enriched
|
||||
|
||||
# ------------------- Internals -------------------
|
||||
|
||||
def _get(self, url: str) -> Optional[str]:
|
||||
try:
|
||||
r = self.client.get(url)
|
||||
r.encoding = "utf-8"
|
||||
if r.status_code == 200 and "text/html" in (
|
||||
r.headers.get("content-type") or ""
|
||||
):
|
||||
return r.text
|
||||
except httpx.HTTPError:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _parse_results(self, html: str) -> List[LehmannsSearchResult]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
results: list[LehmannsSearchResult] = []
|
||||
|
||||
for block in soup.select("div.info-block"):
|
||||
a = block.select_one(".title a[href]")
|
||||
if not a:
|
||||
continue
|
||||
url = urljoin(BASE, a["href"].strip())
|
||||
base_title = (block.select_one(".title [itemprop='name']") or a).get_text( # type: ignore
|
||||
strip=True
|
||||
)
|
||||
|
||||
# Alternative headline => extend title
|
||||
alt_tag = block.select_one(".description[itemprop='alternativeHeadline']") # type: ignore
|
||||
alternative_headline = alt_tag.get_text(strip=True) if alt_tag else None
|
||||
title = (
|
||||
f"{base_title} : {alternative_headline}"
|
||||
if alternative_headline
|
||||
else base_title
|
||||
)
|
||||
description = alternative_headline
|
||||
|
||||
# Authors from .author
|
||||
authors: list[str] = []
|
||||
author_div = block.select_one("div.author") # type: ignore
|
||||
if author_div:
|
||||
t = author_div.get_text(" ", strip=True)
|
||||
t = re.sub(r"^\s*von\s+", "", t, flags=re.I)
|
||||
for part in re.split(r"\s*;\s*|\s*&\s*|\s+und\s+", t):
|
||||
name = " ".join(part.split())
|
||||
if name:
|
||||
authors.append(name)
|
||||
|
||||
# Media + format
|
||||
media_type = None
|
||||
book_format = None
|
||||
type_text = block.select_one(".type") # type: ignore
|
||||
if type_text:
|
||||
t = type_text.get_text(" ", strip=True)
|
||||
m = re.search(r"\b(Buch|eBook|Hörbuch)\b", t)
|
||||
if m:
|
||||
media_type = m.group(1)
|
||||
fm = re.search(r"\(([^)]+)\)", t)
|
||||
if fm:
|
||||
book_format = fm.group(1).strip().upper()
|
||||
|
||||
# Year
|
||||
year = None
|
||||
y = block.select_one("[itemprop='copyrightYear']") # type: ignore
|
||||
if y:
|
||||
try:
|
||||
year = int(y.get_text(strip=True))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Edition
|
||||
edition = None
|
||||
ed = block.select_one("[itemprop='bookEdition']") # type: ignore
|
||||
if ed:
|
||||
m = re.search(r"\d+", ed.get_text(strip=True))
|
||||
if m:
|
||||
edition = int(m.group())
|
||||
|
||||
# Publisher
|
||||
publisher = None
|
||||
pub = block.select_one( # type: ignore
|
||||
".publisherprop [itemprop='name']"
|
||||
) or block.select_one(".publisher [itemprop='name']") # type: ignore
|
||||
if pub:
|
||||
publisher = pub.get_text(strip=True)
|
||||
|
||||
# ISBN-13
|
||||
isbn13 = None
|
||||
isbn_tag = block.select_one(".isbn [itemprop='isbn'], [itemprop='isbn']") # type: ignore
|
||||
if isbn_tag:
|
||||
digits = re.sub(r"[^0-9Xx]", "", isbn_tag.get_text(strip=True))
|
||||
m = re.search(r"(97[89]\d{10})", digits)
|
||||
if m:
|
||||
isbn13 = m.group(1)
|
||||
|
||||
# Price (best effort)
|
||||
price_eur = None
|
||||
txt = block.get_text(" ", strip=True)
|
||||
mprice = re.search(r"(\d{1,3}(?:\.\d{3})*,\d{2})\s*€", txt)
|
||||
if not mprice and block.parent:
|
||||
sib = block.parent.get_text(" ", strip=True)
|
||||
mprice = re.search(r"(\d{1,3}(?:\.\d{3})*,\d{2})\s*€", sib)
|
||||
if mprice:
|
||||
num = mprice.group(1).replace(".", "").replace(",", ".")
|
||||
try:
|
||||
price_eur = float(num)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Image (best-effort)
|
||||
image = None
|
||||
left_img = block.find_previous("img") # type: ignore
|
||||
if left_img and left_img.get("src"):
|
||||
image = urljoin(BASE, left_img["src"])
|
||||
|
||||
results.append(
|
||||
LehmannsSearchResult(
|
||||
title=title,
|
||||
url=url,
|
||||
description=description,
|
||||
authors=authors,
|
||||
media_type=media_type,
|
||||
book_format=book_format,
|
||||
year=year,
|
||||
edition=edition,
|
||||
publisher=publisher,
|
||||
isbn13=isbn13,
|
||||
price_eur=price_eur,
|
||||
image=image,
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
58
src/services/openai.py
Normal file
58
src/services/openai.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
from src import settings
|
||||
|
||||
|
||||
def init_client() -> OpenAI:
|
||||
"""Initialize the OpenAI client with the API key and model from settings."""
|
||||
global client, model, api_key
|
||||
if not settings.openAI.api_key:
|
||||
raise ValueError("OpenAI API key is not set in the configuration.")
|
||||
if not settings.openAI.model:
|
||||
raise ValueError("OpenAI model is not set in the configuration.")
|
||||
|
||||
model = settings.openAI.model
|
||||
api_key = settings.openAI.api_key
|
||||
client = OpenAI(api_key=api_key)
|
||||
return client
|
||||
|
||||
|
||||
def run_shortener(title: str, length: int) -> list[dict[str, Any]]:
|
||||
client = init_client()
|
||||
response = client.responses.create( # type: ignore
|
||||
model=model,
|
||||
instructions="""you are a sentence shortener. The next message will contain the string to shorten and the length limit.
|
||||
You need to shorten the string to be under the length limit, while keeping as much detail as possible. The result may NOT be longer than the length limit.
|
||||
based on that, please reply only the shortened string. Give me 5 choices. if the length is too long, discard the string and try another one.Return the data as a python list containing the result as {"shortened_string": shortened_string, "length": lengthasInt}. Do not return the answer in a codeblock, use a pure string. Before answering, check the results and if ANY is longer than the needed_length, discard all and try again""",
|
||||
input=f'{{"string":"{title}", "needed_length":{length}}}',
|
||||
)
|
||||
answers = response.output_text
|
||||
return eval(answers) # type: ignore
|
||||
# answers are strings in json format, so we need to convert them to a list of dicts
|
||||
|
||||
|
||||
def name_tester(name: str) -> dict:
|
||||
client = init_client()
|
||||
response = client.responses.create( # type: ignore
|
||||
model=model,
|
||||
instructions="""you are a name tester, You are given a name and will have to split the name into first name, last name, and if present the title. Return the name in a json format with the keys "title", "first_name", "last_name". If no title is present, set title to none. Do NOt return the answer in a codeblock, use a pure json string. Assume the names are in the usual german naming scheme""",
|
||||
input=f'{{"name":"{name}"}}',
|
||||
)
|
||||
answers = response.output_text
|
||||
|
||||
return json.loads(answers)
|
||||
|
||||
|
||||
def semester_converter(semester: str) -> str:
|
||||
client = init_client()
|
||||
response = client.responses.create( # type: ignore
|
||||
model=model,
|
||||
instructions="""you are a semester converter. You will be given a string. Convert this into a string like this: SoSe YY or WiSe YY/YY+1. Do not return the answer in a codeblock, use a pure string.""",
|
||||
input=semester,
|
||||
)
|
||||
answers = response.output_text
|
||||
|
||||
return answers
|
||||
631
src/services/sru.py
Normal file
631
src/services/sru.py
Normal file
@@ -0,0 +1,631 @@
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
|
||||
# centralized logging used via src.shared.logging
|
||||
from src.core.models import BookData
|
||||
from src.shared.logging import log
|
||||
|
||||
log # ensure imported logger is referenced
|
||||
|
||||
|
||||
# -----------------------
|
||||
# Dataclasses
|
||||
# -----------------------
|
||||
|
||||
|
||||
# --- MARC XML structures ---
|
||||
@dataclass
|
||||
class ControlField:
|
||||
tag: str
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubField:
|
||||
code: str
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class DataField:
|
||||
tag: str
|
||||
ind1: str = " "
|
||||
ind2: str = " "
|
||||
subfields: List[SubField] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarcRecord:
|
||||
leader: str
|
||||
controlfields: List[ControlField] = field(default_factory=list)
|
||||
datafields: List[DataField] = field(default_factory=list)
|
||||
|
||||
|
||||
# --- SRU record wrapper ---
|
||||
@dataclass
|
||||
class Record:
|
||||
recordSchema: str
|
||||
recordPacking: str
|
||||
recordData: MarcRecord
|
||||
recordPosition: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class EchoedSearchRequest:
|
||||
version: str
|
||||
query: str
|
||||
maximumRecords: int
|
||||
recordPacking: str
|
||||
recordSchema: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchRetrieveResponse:
|
||||
version: str
|
||||
numberOfRecords: int
|
||||
records: List[Record] = field(default_factory=list)
|
||||
echoedSearchRetrieveRequest: Optional[EchoedSearchRequest] = None
|
||||
|
||||
|
||||
# -----------------------
|
||||
# Parser
|
||||
# -----------------------
|
||||
|
||||
ZS = "http://www.loc.gov/zing/srw/"
|
||||
MARC = "http://www.loc.gov/MARC21/slim"
|
||||
NS = {"zs": ZS, "marc": MARC}
|
||||
|
||||
|
||||
def _text(elem: Optional[ET.Element]) -> str:
|
||||
return (elem.text or "") if elem is not None else ""
|
||||
|
||||
|
||||
def _req_text(parent: ET.Element, path: str) -> Optional[str]:
|
||||
el = parent.find(path, NS)
|
||||
if el is None or el.text is None:
|
||||
return None
|
||||
return el.text
|
||||
|
||||
|
||||
def parse_marc_record(record_el: ET.Element) -> MarcRecord:
|
||||
"""
|
||||
record_el is the <marc:record> element (default ns MARC in your sample)
|
||||
"""
|
||||
# leader
|
||||
leader_text = _req_text(record_el, "marc:leader") or ""
|
||||
|
||||
# controlfields
|
||||
controlfields: List[ControlField] = []
|
||||
for cf in record_el.findall("marc:controlfield", NS):
|
||||
tag = cf.get("tag", "").strip()
|
||||
controlfields.append(ControlField(tag=tag, value=_text(cf)))
|
||||
|
||||
# datafields
|
||||
datafields: List[DataField] = []
|
||||
for df in record_el.findall("marc:datafield", NS):
|
||||
tag = df.get("tag", "").strip()
|
||||
ind1 = df.get("ind1") or " "
|
||||
ind2 = df.get("ind2") or " "
|
||||
subfields: List[SubField] = []
|
||||
for sf in df.findall("marc:subfield", NS):
|
||||
code = sf.get("code", "")
|
||||
subfields.append(SubField(code=code, value=_text(sf)))
|
||||
datafields.append(DataField(tag=tag, ind1=ind1, ind2=ind2, subfields=subfields))
|
||||
|
||||
return MarcRecord(
|
||||
leader=leader_text, controlfields=controlfields, datafields=datafields
|
||||
)
|
||||
|
||||
|
||||
def parse_record(zs_record_el: ET.Element) -> Record:
|
||||
recordSchema = _req_text(zs_record_el, "zs:recordSchema") or ""
|
||||
recordPacking = _req_text(zs_record_el, "zs:recordPacking") or ""
|
||||
|
||||
# recordData contains a MARC <record> with default MARC namespace in your sample
|
||||
recordData_el = zs_record_el.find("zs:recordData", NS)
|
||||
if recordData_el is None:
|
||||
raise ValueError("Missing zs:recordData")
|
||||
|
||||
marc_record_el = recordData_el.find("marc:record", NS)
|
||||
if marc_record_el is None:
|
||||
# If the MARC record uses default ns (xmlns="...") ElementTree still needs the ns-qualified name
|
||||
# We already searched with prefix; this covers both default and prefixed cases.
|
||||
raise ValueError("Missing MARC21 record inside zs:recordData")
|
||||
|
||||
marc_record = parse_marc_record(marc_record_el)
|
||||
|
||||
recordPosition = int(_req_text(zs_record_el, "zs:recordPosition") or "0")
|
||||
return Record(
|
||||
recordSchema=recordSchema,
|
||||
recordPacking=recordPacking,
|
||||
recordData=marc_record,
|
||||
recordPosition=recordPosition,
|
||||
)
|
||||
|
||||
|
||||
def parse_echoed_request(root: ET.Element) -> Optional[EchoedSearchRequest]:
|
||||
el = root.find("zs:echoedSearchRetrieveRequest", NS)
|
||||
if el is None:
|
||||
return None
|
||||
|
||||
# Be permissive with missing fields
|
||||
version = _text(el.find("zs:version", NS))
|
||||
query = _text(el.find("zs:query", NS))
|
||||
maximumRecords_text = _text(el.find("zs:maximumRecords", NS)) or "0"
|
||||
recordPacking = _text(el.find("zs:recordPacking", NS))
|
||||
recordSchema = _text(el.find("zs:recordSchema", NS))
|
||||
|
||||
try:
|
||||
maximumRecords = int(maximumRecords_text)
|
||||
except ValueError:
|
||||
maximumRecords = 0
|
||||
|
||||
return EchoedSearchRequest(
|
||||
version=version,
|
||||
query=query,
|
||||
maximumRecords=maximumRecords,
|
||||
recordPacking=recordPacking,
|
||||
recordSchema=recordSchema,
|
||||
)
|
||||
|
||||
|
||||
def parse_search_retrieve_response(
|
||||
xml_str: Union[str, bytes],
|
||||
) -> SearchRetrieveResponse:
|
||||
root = ET.fromstring(xml_str)
|
||||
|
||||
# Root is zs:searchRetrieveResponse
|
||||
version = _req_text(root, "zs:version")
|
||||
numberOfRecords = int(_req_text(root, "zs:numberOfRecords") or "0")
|
||||
|
||||
records_parent = root.find("zs:records", NS)
|
||||
records: List[Record] = []
|
||||
if records_parent is not None:
|
||||
for r in records_parent.findall("zs:record", NS):
|
||||
records.append(parse_record(r))
|
||||
|
||||
echoed = parse_echoed_request(root)
|
||||
|
||||
return SearchRetrieveResponse(
|
||||
version=version,
|
||||
numberOfRecords=numberOfRecords,
|
||||
records=records,
|
||||
echoedSearchRetrieveRequest=echoed,
|
||||
)
|
||||
|
||||
|
||||
# --- Query helpers over MarcRecord ---
|
||||
|
||||
|
||||
def iter_datafields(
|
||||
rec: MarcRecord,
|
||||
tag: Optional[str] = None,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
) -> Iterable[DataField]:
|
||||
"""Yield datafields, optionally filtered by tag/indicators."""
|
||||
for df in rec.datafields:
|
||||
if tag is not None and df.tag != tag:
|
||||
continue
|
||||
if ind1 is not None and df.ind1 != ind1:
|
||||
continue
|
||||
if ind2 is not None and df.ind2 != ind2:
|
||||
continue
|
||||
yield df
|
||||
|
||||
|
||||
def subfield_values(
|
||||
rec: MarcRecord,
|
||||
tag: str,
|
||||
code: str,
|
||||
*,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
) -> List[str]:
|
||||
"""All values for subfield `code` in every `tag` field (respecting indicators)."""
|
||||
out: List[str] = []
|
||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||
out.extend(sf.value for sf in df.subfields if sf.code == code)
|
||||
return out
|
||||
|
||||
|
||||
def first_subfield_value(
|
||||
rec: MarcRecord,
|
||||
tag: str,
|
||||
code: str,
|
||||
*,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
default: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""First value for subfield `code` in `tag` (respecting indicators)."""
|
||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
return sf.value
|
||||
return default
|
||||
|
||||
|
||||
def find_datafields_with_subfields(
|
||||
rec: MarcRecord,
|
||||
tag: str,
|
||||
*,
|
||||
where_all: Optional[Dict[str, str]] = None,
|
||||
where_any: Optional[Dict[str, str]] = None,
|
||||
casefold: bool = False,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
) -> List[DataField]:
|
||||
"""
|
||||
Return datafields of `tag` whose subfields match constraints:
|
||||
- where_all: every (code -> exact value) must be present
|
||||
- where_any: at least one (code -> exact value) present
|
||||
Set `casefold=True` for case-insensitive comparison.
|
||||
"""
|
||||
where_all = where_all or {}
|
||||
where_any = where_any or {}
|
||||
matched: List[DataField] = []
|
||||
|
||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||
# Map code -> list of values (with optional casefold applied)
|
||||
vals: Dict[str, List[str]] = {}
|
||||
for sf in df.subfields:
|
||||
v = sf.value.casefold() if casefold else sf.value
|
||||
vals.setdefault(sf.code, []).append(v)
|
||||
|
||||
ok = True
|
||||
for c, v in where_all.items():
|
||||
vv = v.casefold() if casefold else v
|
||||
if c not in vals or vv not in vals[c]:
|
||||
ok = False
|
||||
break
|
||||
|
||||
if ok and where_any:
|
||||
any_ok = any(
|
||||
(c in vals) and ((v.casefold() if casefold else v) in vals[c])
|
||||
for c, v in where_any.items()
|
||||
)
|
||||
if not any_ok:
|
||||
ok = False
|
||||
|
||||
if ok:
|
||||
matched.append(df)
|
||||
|
||||
return matched
|
||||
|
||||
|
||||
def controlfield_value(
|
||||
rec: MarcRecord, tag: str, default: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Get the first controlfield value by tag (e.g., '001', '005')."""
|
||||
for cf in rec.controlfields:
|
||||
if cf.tag == tag:
|
||||
return cf.value
|
||||
return default
|
||||
|
||||
|
||||
def datafields_value(
|
||||
data: List[DataField], code: str, default: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Get the first value for a specific subfield code in a list of datafields."""
|
||||
for df in data:
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
return sf.value
|
||||
return default
|
||||
|
||||
|
||||
def datafield_value(
|
||||
df: DataField, code: str, default: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Get the first value for a specific subfield code in a datafield."""
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
return sf.value
|
||||
return default
|
||||
|
||||
|
||||
def _smart_join_title(a: str, b: Optional[str]) -> str:
|
||||
"""
|
||||
Join 245 $a and $b with MARC-style punctuation.
|
||||
If $b is present, join with ' : ' unless either side already supplies punctuation.
|
||||
"""
|
||||
a = a.strip()
|
||||
if not b:
|
||||
return a
|
||||
b = b.strip()
|
||||
if a.endswith((":", ";", "/")) or b.startswith((":", ";", "/")):
|
||||
return f"{a} {b}"
|
||||
return f"{a} : {b}"
|
||||
|
||||
|
||||
def subfield_values_from_fields(
|
||||
fields: Iterable[DataField],
|
||||
code: str,
|
||||
) -> List[str]:
|
||||
"""All subfield values with given `code` across a list of DataField."""
|
||||
return [sf.value for df in fields for sf in df.subfields if sf.code == code]
|
||||
|
||||
|
||||
def first_subfield_value_from_fields(
|
||||
fields: Iterable[DataField],
|
||||
code: str,
|
||||
default: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""First subfield value with given `code` across a list of DataField."""
|
||||
for df in fields:
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
return sf.value
|
||||
return default
|
||||
|
||||
|
||||
def subfield_value_pairs_from_fields(
|
||||
fields: Iterable[DataField],
|
||||
code: str,
|
||||
) -> List[Tuple[DataField, str]]:
|
||||
"""
|
||||
Return (DataField, value) pairs for all subfields with `code`.
|
||||
Useful if you need to know which field a value came from.
|
||||
"""
|
||||
out: List[Tuple[DataField, str]] = []
|
||||
for df in fields:
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
out.append((df, sf.value))
|
||||
return out
|
||||
|
||||
|
||||
def book_from_marc(rec: MarcRecord) -> BookData:
|
||||
# PPN from controlfield 001
|
||||
ppn = controlfield_value(rec, "001")
|
||||
|
||||
# Title = 245 $a + 245 $b (if present)
|
||||
t_a = first_subfield_value(rec, "245", "a")
|
||||
t_b = first_subfield_value(rec, "245", "b")
|
||||
title = _smart_join_title(t_a, t_b) if t_a else None
|
||||
|
||||
# Signature = 924 where $9 == "Frei 129" → take that field's $g
|
||||
frei_fields = find_datafields_with_subfields(
|
||||
rec, "924", where_all={"9": "Frei 129"}
|
||||
)
|
||||
signature = first_subfield_value_from_fields(frei_fields, "g")
|
||||
|
||||
# Year = 264 $c (prefer ind2="1" publication; fallback to any 264)
|
||||
year = first_subfield_value(rec, "264", "c", ind2="1") or first_subfield_value(
|
||||
rec, "264", "c"
|
||||
)
|
||||
isbn = subfield_values(rec, "020", "a")
|
||||
mediatype = first_subfield_value(rec, "338", "a")
|
||||
lang = subfield_values(rec, "041", "a")
|
||||
authors = subfield_values(rec, "700", "a")
|
||||
author = None
|
||||
if authors:
|
||||
author = "; ".join(authors)
|
||||
|
||||
return BookData(
|
||||
ppn=ppn,
|
||||
title=title,
|
||||
signature=signature,
|
||||
edition=first_subfield_value(rec, "250", "a") or "",
|
||||
year=year,
|
||||
pages=first_subfield_value(rec, "300", "a") or "",
|
||||
publisher=first_subfield_value(rec, "264", "b") or "",
|
||||
isbn=isbn,
|
||||
language=lang,
|
||||
link="",
|
||||
author=author,
|
||||
media_type=mediatype,
|
||||
)
|
||||
|
||||
|
||||
class SWBData(Enum):
|
||||
URL = "https://sru.k10plus.de/opac-de-627!rec=1?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=marcxml"
|
||||
ARGSCHEMA = "pica."
|
||||
NAME = "SWB"
|
||||
|
||||
|
||||
class DNBData(Enum):
|
||||
URL = "https://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=MARC21-xml"
|
||||
ARGSCHEMA = ""
|
||||
NAME = "DNB"
|
||||
|
||||
|
||||
class SRUSite(Enum):
|
||||
SWB = SWBData
|
||||
DNB = DNBData
|
||||
|
||||
|
||||
RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative char set typically seen in RVK notations
|
||||
|
||||
|
||||
def find_newer_edition(
|
||||
swb_result: BookData, dnb_result: List[BookData]
|
||||
) -> Optional[List[BookData]]:
|
||||
"""
|
||||
New edition if:
|
||||
- year > swb.year OR
|
||||
- edition_number > swb.edition_number
|
||||
|
||||
Additional guards & preferences:
|
||||
- If both have signatures and they differ, skip (not the same work).
|
||||
- For duplicates (same ppn): keep the one that has a signature, and
|
||||
prefer a signature that matches swb_result.signature.
|
||||
- If multiple remain: keep the single 'latest' by (year desc,
|
||||
edition_number desc, best-signature-match desc, has-signature desc).
|
||||
"""
|
||||
|
||||
def norm_sig(s: Optional[str]) -> str:
|
||||
if not s:
|
||||
return ""
|
||||
# normalize: lowercase, collapse whitespace, keep alnum + a few separators
|
||||
s = s.lower()
|
||||
s = re.sub(r"\s+", " ", s).strip()
|
||||
# remove obvious noise; adjust if your signature format differs
|
||||
s = re.sub(r"[^a-z0-9\-_/\. ]+", "", s)
|
||||
return s
|
||||
|
||||
def has_sig(b: BookData) -> bool:
|
||||
return bool(getattr(b, "signature", None))
|
||||
|
||||
def sig_matches_swb(b: BookData) -> bool:
|
||||
if not has_sig(b) or not has_sig(swb_result):
|
||||
return False
|
||||
return norm_sig(b.signature) == norm_sig(swb_result.signature)
|
||||
|
||||
def strictly_newer(b: BookData) -> bool:
|
||||
by_year = (
|
||||
b.year is not None
|
||||
and swb_result.year is not None
|
||||
and b.year > swb_result.year
|
||||
)
|
||||
by_edition = (
|
||||
b.edition_number is not None
|
||||
and swb_result.edition_number is not None
|
||||
and b.edition_number > swb_result.edition_number
|
||||
)
|
||||
return by_year or by_edition
|
||||
|
||||
swb_sig_norm = norm_sig(getattr(swb_result, "signature", None))
|
||||
|
||||
# 1) Filter to same-work AND newer
|
||||
candidates: List[BookData] = []
|
||||
for b in dnb_result:
|
||||
# Skip if both signatures exist and don't match (different work)
|
||||
b_sig = getattr(b, "signature", None)
|
||||
if b_sig and swb_result.signature:
|
||||
if norm_sig(b_sig) != swb_sig_norm:
|
||||
continue # not the same work
|
||||
|
||||
# Keep only if newer by rules
|
||||
if strictly_newer(b):
|
||||
candidates.append(b)
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
# 2) Dedupe by PPN, preferring signature (and matching signature if possible)
|
||||
by_ppn: dict[Optional[str], BookData] = {}
|
||||
for b in candidates:
|
||||
key = getattr(b, "ppn", None)
|
||||
prev = by_ppn.get(key)
|
||||
if prev is None:
|
||||
by_ppn[key] = b
|
||||
continue
|
||||
|
||||
# Compute preference score for both
|
||||
def ppn_pref_score(x: BookData) -> tuple[int, int]:
|
||||
# (signature matches swb, has signature)
|
||||
return (1 if sig_matches_swb(x) else 0, 1 if has_sig(x) else 0)
|
||||
|
||||
if ppn_pref_score(b) > ppn_pref_score(prev):
|
||||
by_ppn[key] = b
|
||||
|
||||
deduped = list(by_ppn.values())
|
||||
if not deduped:
|
||||
return None
|
||||
|
||||
# 3) If multiple remain, keep only the latest one.
|
||||
# Order: year desc, edition_number desc, signature-match desc, has-signature desc
|
||||
def sort_key(b: BookData):
|
||||
year = b.year if b.year is not None else -1
|
||||
ed = b.edition_number if b.edition_number is not None else -1
|
||||
sig_match = 1 if sig_matches_swb(b) else 0
|
||||
sig_present = 1 if has_sig(b) else 0
|
||||
return (year, ed, sig_match, sig_present)
|
||||
|
||||
best = max(deduped, key=sort_key)
|
||||
return [best] if best else None
|
||||
|
||||
|
||||
class Api:
|
||||
def __init__(self, site: str, url: str, prefix: str):
|
||||
self.site = site
|
||||
self.url = url
|
||||
self.prefix = prefix
|
||||
# Reuse TCP connections across requests for better performance
|
||||
self._session = requests.Session()
|
||||
# Slightly larger connection pool for concurrent calls
|
||||
adapter = HTTPAdapter(pool_connections=10, pool_maxsize=20)
|
||||
self._session.mount("http://", adapter)
|
||||
self._session.mount("https://", adapter)
|
||||
|
||||
def close(self):
|
||||
try:
|
||||
self._session.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __del__(self):
|
||||
# Best-effort cleanup
|
||||
self.close()
|
||||
|
||||
def get(self, query_args: Iterable[str]) -> List[Record]:
|
||||
# if any query_arg ends with =, remove it
|
||||
if self.site == "DNB":
|
||||
args = [arg for arg in query_args if not arg.startswith("pica.")]
|
||||
if args == []:
|
||||
raise ValueError("DNB queries must include at least one search term")
|
||||
query_args = args
|
||||
# query_args = [f"{self.prefix}{arg}" for arg in query_args]
|
||||
query = "+and+".join(query_args)
|
||||
query = query.replace(" ", "%20").replace("&", "%26")
|
||||
# query_args = [arg for arg in query_args if not arg.endswith("=")]
|
||||
# query = "+and+".join(query_args)
|
||||
# query = query.replace(" ", "%20").replace("&", "%26")
|
||||
# insert the query into the url url is
|
||||
url = self.url.format(query)
|
||||
|
||||
log.debug(url)
|
||||
headers = {
|
||||
"User-Agent": f"{self.site} SRU Client, <alexander.kirchner@ph-freiburg.de>",
|
||||
"Accept": "application/xml",
|
||||
"Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
|
||||
}
|
||||
# Use persistent session and set timeouts to avoid hanging
|
||||
resp = self._session.get(url, headers=headers, timeout=(3.05, 60))
|
||||
if resp.status_code != 200:
|
||||
raise Exception(f"Error fetching data from SWB: {resp.status_code}")
|
||||
# Parse using raw bytes (original behavior) to preserve encoding edge cases
|
||||
sr = parse_search_retrieve_response(resp.content)
|
||||
return sr.records
|
||||
|
||||
def getBooks(self, query_args: Iterable[str]) -> List[BookData]:
|
||||
records: List[Record] = self.get(query_args)
|
||||
# Avoid printing on hot paths; rely on logger if needed
|
||||
log.debug(f"{self.site} found {len(records)} records for args={query_args}")
|
||||
books: List[BookData] = []
|
||||
# extract title from query_args if present
|
||||
title = None
|
||||
for arg in query_args:
|
||||
if arg.startswith("pica.tit="):
|
||||
title = arg.split("=")[1]
|
||||
break
|
||||
for rec in records:
|
||||
book = book_from_marc(rec.recordData)
|
||||
books.append(book)
|
||||
if title:
|
||||
books = [
|
||||
b
|
||||
for b in books
|
||||
if b.title and b.title.lower().startswith(title.lower())
|
||||
]
|
||||
return books
|
||||
|
||||
def getLinkForBook(self, book: BookData) -> str:
|
||||
# Not implemented: depends on catalog front-end; return empty string for now
|
||||
return ""
|
||||
|
||||
|
||||
class SWB(Api):
|
||||
def __init__(self):
|
||||
self.site = SWBData.NAME.value
|
||||
self.url = SWBData.URL.value
|
||||
self.prefix = SWBData.ARGSCHEMA.value
|
||||
super().__init__(self.site, self.url, self.prefix)
|
||||
35
src/services/webadis.py
Normal file
35
src/services/webadis.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
|
||||
def get_book_medianr(signature: str, semesterapparat_nr: int, auth: tuple) -> str:
|
||||
with sync_playwright() as playwright:
|
||||
browser = playwright.chromium.launch(headless=True)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
page.goto(
|
||||
"https://bsz.ibs-bw.de:22998/aDISWeb/app?service=direct/0/Home/$DirectLink&sp=SDAP42"
|
||||
)
|
||||
page.get_by_role("textbox", name="Benutzer").fill(auth[0])
|
||||
page.get_by_role("textbox", name="Benutzer").press("Tab")
|
||||
page.get_by_role("textbox", name="Kennwort").fill(auth[1])
|
||||
page.get_by_role("textbox", name="Kennwort").press("Enter")
|
||||
page.get_by_role("button", name="Katalog").click()
|
||||
page.get_by_role("textbox", name="Signatur").click()
|
||||
page.get_by_role("textbox", name="Signatur").fill(signature)
|
||||
page.get_by_role("textbox", name="Signatur").press("Enter")
|
||||
book_list = page.locator("iframe").content_frame.get_by_role(
|
||||
"cell", name="Bibliothek der Pädagogischen"
|
||||
)
|
||||
# this will always find one result, we need to split the resulting text based on the entries that start with "* "
|
||||
book_entries = book_list.inner_text().split("\n")
|
||||
books = []
|
||||
for entry in book_entries:
|
||||
if entry.startswith("* "):
|
||||
books.append(entry)
|
||||
for book in books:
|
||||
if f"Semesterapparat: {semesterapparat_nr}" in book:
|
||||
return book.split("* ")[1].split(":")[0]
|
||||
|
||||
# ---------------------
|
||||
context.close()
|
||||
browser.close()
|
||||
314
src/services/webrequest.py
Normal file
314
src/services/webrequest.py
Normal file
@@ -0,0 +1,314 @@
|
||||
from enum import Enum
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# import sleep_and_retry decorator to retry requests
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
|
||||
from src.core.models import BookData
|
||||
from src.shared.logging import log
|
||||
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
|
||||
from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
|
||||
|
||||
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
|
||||
|
||||
|
||||
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
|
||||
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||
BASE = "https://rds.ibs-bw.de"
|
||||
#
|
||||
TITLE = "RDS_TITLE"
|
||||
SIGNATURE = "RDS_SIGNATURE"
|
||||
EDITION = "RDS_EDITION"
|
||||
ISBN = "RDS_ISBN"
|
||||
AUTHOR = "RDS_PERSON"
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
|
||||
(HTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
|
||||
"Accept-Language": "en-US, en;q=0.5",
|
||||
}
|
||||
RATE_LIMIT = 20
|
||||
RATE_PERIOD = 30
|
||||
|
||||
|
||||
class TransformerType(Enum):
|
||||
ARRAY = "ARRAY"
|
||||
COinS = "COinS"
|
||||
BibTeX = "BibTeX"
|
||||
RIS = "RIS"
|
||||
RDS = "RDS"
|
||||
|
||||
|
||||
class WebRequest:
|
||||
def __init__(self) -> None:
|
||||
"""Request data from the web, and format it depending on the mode."""
|
||||
self.apparat = None
|
||||
self.use_any = False # use any book that matches the search term
|
||||
self.signature = None
|
||||
self.ppn = None
|
||||
self.data = None
|
||||
self.timeout = 5
|
||||
log.info("Initialized WebRequest")
|
||||
|
||||
@property
|
||||
def use_any_book(self):
|
||||
"""use any book that matches the search term"""
|
||||
self.use_any = True
|
||||
log.info("Using any book")
|
||||
return self
|
||||
|
||||
def set_apparat(self, apparat: int) -> "WebRequest":
|
||||
self.apparat = apparat
|
||||
if int(self.apparat) < 10:
|
||||
self.apparat = f"0{self.apparat}"
|
||||
log.info(f"Set apparat to {self.apparat}")
|
||||
return self
|
||||
|
||||
def get_ppn(self, signature: str) -> "WebRequest":
|
||||
self.signature = signature
|
||||
if "+" in signature:
|
||||
signature = signature.replace("+", "%2B")
|
||||
if "doi.org" in signature:
|
||||
signature = signature.split("/")[-1]
|
||||
self.ppn = signature
|
||||
return self
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search_book(self, searchterm: str) -> str:
|
||||
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search_ppn(self, ppn: str) -> str:
|
||||
response = requests.get(API_URL.format(ppn), timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def get_book_links(self, searchterm: str) -> list[str]:
|
||||
response: str = self.search_book(searchterm) # type:ignore
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
links = soup.find_all("a", class_="title getFull")
|
||||
res: list[str] = []
|
||||
for link in links:
|
||||
res.append(BASE + link["href"])
|
||||
return res
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search(self, link: str) -> Optional[str]:
|
||||
try:
|
||||
response = requests.get(link, timeout=self.timeout)
|
||||
return response.text
|
||||
except requests.exceptions.RequestException as e:
|
||||
log.error(f"Request failed: {e}")
|
||||
return None
|
||||
|
||||
def get_data(self) -> Optional[list[str]]:
|
||||
links = self.get_book_links(self.ppn)
|
||||
log.debug(f"Links: {links}")
|
||||
return_data: list[str] = []
|
||||
for link in links:
|
||||
result: str = self.search(link) # type:ignore
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||
if locations:
|
||||
for location in locations:
|
||||
if "1. OG Semesterapparat" in location.text:
|
||||
log.success("Found Semesterapparat, adding entry")
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
else:
|
||||
item_location = location.find(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
).text.strip()
|
||||
log.debug(f"Item location: {item_location}")
|
||||
if self.use_any:
|
||||
pre_tag = soup.find_all("pre")
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
raise ValueError("No <pre> tag found")
|
||||
elif f"Semesterapparat-{self.apparat}" in item_location:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
else:
|
||||
log.error(
|
||||
f"Signature {self.signature} not found in {item_location}"
|
||||
)
|
||||
# return_data = []
|
||||
|
||||
return return_data
|
||||
|
||||
def get_data_elsa(self) -> Optional[list[str]]:
|
||||
links = self.get_book_links(self.ppn)
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||
if locations:
|
||||
for _ in locations:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
|
||||
|
||||
class BibTextTransformer:
|
||||
"""Transforms data from the web into a BibText format.
|
||||
Valid Modes are ARRAY, COinS, BibTeX, RIS, RDS
|
||||
Raises:
|
||||
ValueError: Raised if mode is not in valid_modes
|
||||
"""
|
||||
|
||||
valid_modes = [
|
||||
TransformerType.ARRAY,
|
||||
TransformerType.COinS,
|
||||
TransformerType.BibTeX,
|
||||
TransformerType.RIS,
|
||||
TransformerType.RDS,
|
||||
]
|
||||
|
||||
def __init__(self, mode: TransformerType = TransformerType.ARRAY) -> None:
|
||||
self.mode = mode.value
|
||||
self.field = None
|
||||
self.signature = None
|
||||
if mode not in self.valid_modes:
|
||||
log.error(f"Mode {mode} not valid")
|
||||
raise ValueError(f"Mode {mode} not valid")
|
||||
self.data = None
|
||||
# self.bookdata = BookData(**self.data)
|
||||
|
||||
def use_signature(self, signature: str) -> "BibTextTransformer":
|
||||
"""use the exact signature to search for the book"""
|
||||
self.signature = signature
|
||||
return self
|
||||
|
||||
def get_data(self, data: Optional[list[str]] = None) -> "BibTextTransformer":
|
||||
RIS_IDENT = "TY -"
|
||||
ARRAY_IDENT = "[kid]"
|
||||
COinS_IDENT = "ctx_ver"
|
||||
BIBTEX_IDENT = "@book"
|
||||
RDS_IDENT = "RDS ---------------------------------- "
|
||||
|
||||
if data is None:
|
||||
self.data = None
|
||||
return self
|
||||
|
||||
if self.mode == "RIS":
|
||||
for line in data:
|
||||
if RIS_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "ARRAY":
|
||||
for line in data:
|
||||
if ARRAY_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "COinS":
|
||||
for line in data:
|
||||
if COinS_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "BibTeX":
|
||||
for line in data:
|
||||
if BIBTEX_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "RDS":
|
||||
for line in data:
|
||||
if RDS_IDENT in line:
|
||||
self.data = line
|
||||
return self
|
||||
|
||||
def return_data(
|
||||
self, option: Any = None
|
||||
) -> Union[
|
||||
Optional[BookData],
|
||||
Optional[RDS_GENERIC_DATA],
|
||||
Optional[RDS_AVAIL_DATA],
|
||||
None,
|
||||
dict[str, Union[RDS_AVAIL_DATA, RDS_GENERIC_DATA]],
|
||||
]:
|
||||
"""Return Data to caller.
|
||||
|
||||
Args:
|
||||
option (string, optional): Option for RDS as there are two filetypes. Use rds_availability or rds_data. Anything else gives a dict of both responses. Defaults to None.
|
||||
|
||||
Returns:
|
||||
BookData: a dataclass containing data about the book
|
||||
"""
|
||||
if self.data is None:
|
||||
return None
|
||||
match self.mode:
|
||||
case "ARRAY":
|
||||
return ARRAYData(self.signature).transform(self.data)
|
||||
case "COinS":
|
||||
return COinSData().transform(self.data)
|
||||
case "BibTeX":
|
||||
return BibTeXData().transform(self.data)
|
||||
case "RIS":
|
||||
return RISData().transform(self.data)
|
||||
case "RDS":
|
||||
return RDSData().transform(self.data).return_data(option)
|
||||
case _:
|
||||
return None
|
||||
|
||||
# if self.mode == "ARRAY":
|
||||
# return ARRAYData().transform(self.data)
|
||||
# elif self.mode == "COinS":
|
||||
# return COinSData().transform(self.data)
|
||||
# elif self.mode == "BibTeX":
|
||||
# return BibTeXData().transform(self.data)
|
||||
# elif self.mode == "RIS":
|
||||
# return RISData().transform(self.data)
|
||||
# elif self.mode == "RDS":
|
||||
# return RDSData().transform(self.data).return_data(option)
|
||||
|
||||
|
||||
def cover(isbn):
|
||||
test_url = f"https://www.buchhandel.de/cover/{isbn}/{isbn}-cover-m.jpg"
|
||||
# log.debug(test_url)
|
||||
data = requests.get(test_url, stream=True)
|
||||
return data.content
|
||||
|
||||
|
||||
def get_content(soup, css_class):
|
||||
return soup.find("div", class_=css_class).text.strip()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# log.debug("main")
|
||||
link = "CU 8500 K64"
|
||||
data = WebRequest(71).get_ppn(link).get_data()
|
||||
bib = BibTextTransformer("ARRAY").get_data().return_data()
|
||||
log.debug(bib)
|
||||
340
src/services/zotero.py
Normal file
340
src/services/zotero.py
Normal file
@@ -0,0 +1,340 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from pyzotero import zotero
|
||||
|
||||
from src import settings
|
||||
from src.services.webrequest import BibTextTransformer, WebRequest
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
@dataclass
|
||||
class Creator:
|
||||
firstName: str = None
|
||||
lastName: str = None
|
||||
creatorType: str = "author"
|
||||
|
||||
def from_dict(self, data: dict) -> None:
|
||||
for key, value in data.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
def from_string(self, data: str) -> "Creator":
|
||||
if "," in data:
|
||||
self.firstName = data.split(",")[1]
|
||||
self.lastName = data.split(",")[0]
|
||||
|
||||
return self
|
||||
|
||||
# set __dict__ object to be used in json
|
||||
|
||||
|
||||
@dataclass
|
||||
class Book:
|
||||
itemType: str = "book"
|
||||
creators: list[Creator] = None
|
||||
tags: list = None
|
||||
collections: list = None
|
||||
relations: dict = None
|
||||
title: str = None
|
||||
abstractNote: str = None
|
||||
series: str = None
|
||||
seriesNumber: str = None
|
||||
volume: str = None
|
||||
numberOfVolumes: str = None
|
||||
edition: str = None
|
||||
place: str = None
|
||||
publisher: str = None
|
||||
date: str = None
|
||||
numPages: str = None
|
||||
language: str = None
|
||||
ISBN: str = None
|
||||
shortTitle: str = None
|
||||
url: str = None
|
||||
accessDate: str = None
|
||||
archive: str = None
|
||||
archiveLocation: str = None
|
||||
libraryCatalog: str = None
|
||||
callNumber: str = None
|
||||
rights: str = None
|
||||
extra: str = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
ret = {}
|
||||
for key, value in self.__dict__.items():
|
||||
if value:
|
||||
ret[key] = value
|
||||
return ret
|
||||
|
||||
|
||||
@dataclass
|
||||
class BookSection:
|
||||
itemType: str = "bookSection"
|
||||
title: str = None
|
||||
creators: list[Creator] = None
|
||||
abstractNote: str = None
|
||||
bookTitle: str = None
|
||||
series: str = None
|
||||
seriesNumber: str = None
|
||||
volume: str = None
|
||||
numberOfVolumes: str = None
|
||||
edition: str = None
|
||||
place: str = None
|
||||
publisher: str = None
|
||||
date: str = None
|
||||
pages: str = None
|
||||
language: str = None
|
||||
ISBN: str = None
|
||||
shortTitle: str = None
|
||||
url: str = None
|
||||
accessDate: str = None
|
||||
archive: str = None
|
||||
archiveLocation: str = None
|
||||
libraryCatalog: str = None
|
||||
callNumber: str = None
|
||||
rights: str = None
|
||||
extra: str = None
|
||||
tags = list
|
||||
collections = list
|
||||
relations = dict
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
ret = {}
|
||||
for key, value in self.__dict__.items():
|
||||
if value:
|
||||
ret[key] = value
|
||||
return ret
|
||||
|
||||
def assign(self, book) -> None:
|
||||
for key, value in book.__dict__.items():
|
||||
if key in self.__dict__.keys():
|
||||
try:
|
||||
setattr(self, key, value)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class JournalArticle:
|
||||
itemType = "journalArticle"
|
||||
title: str = None
|
||||
creators: list[Creator] = None
|
||||
abstractNote: str = None
|
||||
publicationTitle: str = None
|
||||
volume: str = None
|
||||
issue: str = None
|
||||
pages: str = None
|
||||
date: str = None
|
||||
series: str = None
|
||||
seriesTitle: str = None
|
||||
seriesText: str = None
|
||||
journalAbbreviation: str = None
|
||||
language: str = None
|
||||
DOI: str = None
|
||||
ISSN: str = None
|
||||
shortTitle: str = None
|
||||
url: str = None
|
||||
accessDate: str = None
|
||||
archive: str = None
|
||||
archiveLocation: str = None
|
||||
libraryCatalog: str = None
|
||||
callNumber: str = None
|
||||
rights: str = None
|
||||
extra: str = None
|
||||
tags = list
|
||||
collections = list
|
||||
relations = dict
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
ret = {}
|
||||
for key, value in self.__dict__.items():
|
||||
if value:
|
||||
ret[key] = value
|
||||
return ret
|
||||
|
||||
def assign(self, book: dict) -> None:
|
||||
for key, value in book.__dict__.items():
|
||||
if key in self.__dict__.keys():
|
||||
try:
|
||||
setattr(self, key, value)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
|
||||
class ZoteroController:
|
||||
zoterocfg = settings.zotero
|
||||
|
||||
def __init__(self):
|
||||
if self.zoterocfg.library_id is None:
|
||||
return
|
||||
self.zot = zotero.Zotero( # type: ignore
|
||||
self.zoterocfg.library_id,
|
||||
self.zoterocfg.library_type,
|
||||
self.zoterocfg.api_key,
|
||||
)
|
||||
|
||||
def get_books(self) -> list:
|
||||
ret = []
|
||||
items = self.zot.top() # type: ignore
|
||||
for item in items:
|
||||
if item["data"]["itemType"] == "book":
|
||||
ret.append(item)
|
||||
return ret
|
||||
|
||||
# create item in zotero
|
||||
# item is a part of a book
|
||||
def __get_data(self, isbn) -> dict:
|
||||
web = WebRequest()
|
||||
web.get_ppn(isbn)
|
||||
data = web.get_data_elsa()
|
||||
bib = BibTextTransformer()
|
||||
bib.get_data(data)
|
||||
book = bib.return_data()
|
||||
return book
|
||||
|
||||
# # #print(zot.item_template("bookSection"))
|
||||
def createBook(self, isbn) -> Book:
|
||||
book = self.__get_data(isbn)
|
||||
|
||||
bookdata = Book()
|
||||
bookdata.title = book.title.split(":")[0]
|
||||
bookdata.ISBN = book.isbn
|
||||
bookdata.language = book.language
|
||||
bookdata.date = book.year
|
||||
bookdata.publisher = book.publisher
|
||||
bookdata.url = book.link
|
||||
bookdata.edition = book.edition
|
||||
bookdata.place = book.place
|
||||
bookdata.numPages = book.pages
|
||||
authors = [
|
||||
Creator().from_string(author).__dict__ for author in book.author.split(";")
|
||||
]
|
||||
authors = [author for author in authors if author["lastName"] is not None]
|
||||
bookdata.creators = authors
|
||||
return bookdata
|
||||
|
||||
def createItem(self, item) -> Optional[str]:
|
||||
resp = self.zot.create_items([item]) # type: ignore
|
||||
if "successful" in resp.keys():
|
||||
log.debug(resp)
|
||||
return resp["successful"]["0"]["key"]
|
||||
else:
|
||||
return None
|
||||
|
||||
def deleteItem(self, key) -> None:
|
||||
items = self.zot.items()
|
||||
for item in items:
|
||||
if item["key"] == key:
|
||||
self.zot.delete_item(item) # type: ignore
|
||||
# #print(item)
|
||||
break
|
||||
|
||||
def createHGSection(self, book: Book, data: dict) -> Optional[str]:
|
||||
log.debug(book)
|
||||
chapter = BookSection()
|
||||
chapter.assign(book)
|
||||
chapter.pages = data["pages"]
|
||||
chapter.itemType = "bookSection"
|
||||
chapter.ISBN = ""
|
||||
chapter.url = ""
|
||||
chapter.title = data["chapter_title"]
|
||||
creators = chapter.creators
|
||||
for creator in creators:
|
||||
creator["creatorType"] = "editor"
|
||||
chapter.creators = creators
|
||||
authors = [
|
||||
Creator().from_string(author).__dict__
|
||||
for author in data["section_author"].split(";")
|
||||
]
|
||||
chapter.creators += authors
|
||||
|
||||
log.debug(chapter.to_dict())
|
||||
return self.createItem(chapter.to_dict())
|
||||
pass
|
||||
|
||||
def createBookSection(self, book: Book, data: dict) -> Optional[str]:
|
||||
chapter = BookSection()
|
||||
chapter.assign(book)
|
||||
chapter.pages = data["pages"]
|
||||
chapter.itemType = "bookSection"
|
||||
chapter.ISBN = ""
|
||||
chapter.url = ""
|
||||
chapter.title = ""
|
||||
return self.createItem(chapter.to_dict())
|
||||
# chapter.creators
|
||||
|
||||
def createJournalArticle(self, journal, article) -> Optional[str]:
|
||||
# #print(type(article))
|
||||
journalarticle = JournalArticle()
|
||||
journalarticle.assign(journal)
|
||||
journalarticle.itemType = "journalArticle"
|
||||
journalarticle.creators = [
|
||||
Creator().from_string(author).__dict__
|
||||
for author in article["section_author"].split(";")
|
||||
]
|
||||
journalarticle.date = article["year"]
|
||||
journalarticle.title = article["chapter_title"]
|
||||
journalarticle.publicationTitle = article["work_title"].split(":")[0].strip()
|
||||
journalarticle.pages = article["pages"]
|
||||
journalarticle.ISSN = article["isbn"]
|
||||
journalarticle.issue = article["issue"]
|
||||
journalarticle.url = article["isbn"]
|
||||
|
||||
# #print(journalarticle.to_dict())
|
||||
|
||||
return self.createItem(journalarticle.to_dict())
|
||||
|
||||
def get_citation(self, item) -> str:
|
||||
title = self.zot.item( # type: ignore
|
||||
item,
|
||||
content="bib",
|
||||
style="deutsche-gesellschaft-fur-psychologie",
|
||||
)[0]
|
||||
# title = title[0]
|
||||
title = (
|
||||
title.replace("<i>", "")
|
||||
.replace("</i>", "")
|
||||
.replace('<div class="csl-entry">', "")
|
||||
.replace("</div>", "")
|
||||
.replace("&", "&")
|
||||
)
|
||||
return title
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
zot = ZoteroController()
|
||||
book = zot.createBook("DV 3000 D649 (4)")
|
||||
row = "Döbert, Hans & Hörner, Wolfgang & Kopp, Bortho von & Reuter, Lutz R."
|
||||
zot.createBookSection()
|
||||
|
||||
# book = Book()
|
||||
# # # book.
|
||||
# ISBN = "9783801718718"
|
||||
# book = createBook(isbn=ISBN)
|
||||
# chapter = BookSection()
|
||||
# chapter.title = "Geistige Behinderung"
|
||||
# chapter.bookTitle = book.title
|
||||
# chapter.pages = "511 - 538"
|
||||
# chapter.publisher = book.publisher
|
||||
# authors = [
|
||||
# Creator("Jennifer M.", "Phillips").__dict__,
|
||||
# Creator("Hower", "Kwon").__dict__,
|
||||
# Creator("Carl", "Feinstein").__dict__,
|
||||
# Creator("Inco", "Spintczok von Brisinski").__dict__,
|
||||
# ]
|
||||
# publishers = book.author
|
||||
# if isinstance(publishers, str):
|
||||
# publishers = [publishers]
|
||||
# for publisher in publishers:
|
||||
# # #print(publisher)
|
||||
# creator = Creator().from_string(publisher)
|
||||
# creator.creatorType = "editor"
|
||||
# authors.append(creator.__dict__)
|
||||
|
||||
# chapter.creators = authors
|
||||
# chapter.publisher = book.publisher
|
||||
# # #print(chapter.to_dict())
|
||||
# createBookSection(chapter.to_dict())
|
||||
# get_citation("9ZXH8DDE")
|
||||
# # # #print()
|
||||
# # #print(get_books())
|
||||
# # #print(zot.item_creator_types("bookSection"))
|
||||
6
src/shared/__init__.py
Normal file
6
src/shared/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""Shared utilities and cross-cutting concerns."""
|
||||
|
||||
from .logging import log
|
||||
from .config import Settings, load_config
|
||||
|
||||
__all__ = ["log", "Settings", "load_config"]
|
||||
66
src/shared/config.py
Normal file
66
src/shared/config.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""Application configuration and settings."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
@dataclass
|
||||
class Settings:
|
||||
"""Settings for the application."""
|
||||
|
||||
save_path: str
|
||||
database_name: str
|
||||
database_path: str
|
||||
bib_id: str = ""
|
||||
default_apps: bool = True
|
||||
custom_applications: list[dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
def save_settings(self, config_path: str | Path = "config.yaml") -> None:
|
||||
"""Save the settings to the config file.
|
||||
|
||||
Args:
|
||||
config_path: Path to the configuration file
|
||||
"""
|
||||
try:
|
||||
with open(config_path, "w") as f:
|
||||
yaml.dump(self.__dict__, f)
|
||||
log.info(f"Settings saved to {config_path}")
|
||||
except Exception as e:
|
||||
log.error(f"Failed to save settings: {e}")
|
||||
raise
|
||||
|
||||
@classmethod
|
||||
def load_settings(cls, config_path: str | Path = "config.yaml") -> dict[str, Any]:
|
||||
"""Load the settings from the config file.
|
||||
|
||||
Args:
|
||||
config_path: Path to the configuration file
|
||||
|
||||
Returns:
|
||||
Dictionary containing the loaded settings
|
||||
"""
|
||||
try:
|
||||
with open(config_path, "r") as f:
|
||||
data = yaml.safe_load(f)
|
||||
log.info(f"Settings loaded from {config_path}")
|
||||
return data
|
||||
except Exception as e:
|
||||
log.error(f"Failed to load settings: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def load_config(config_path: str | Path = "config.yaml") -> dict[str, Any]:
|
||||
"""Convenience function to load configuration.
|
||||
|
||||
Args:
|
||||
config_path: Path to the configuration file
|
||||
|
||||
Returns:
|
||||
Dictionary containing the loaded settings
|
||||
"""
|
||||
return Settings.load_settings(config_path)
|
||||
25
src/shared/logging.py
Normal file
25
src/shared/logging.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import sys
|
||||
|
||||
import loguru
|
||||
|
||||
from src import LOG_DIR
|
||||
|
||||
log = loguru.logger
|
||||
_configured = False
|
||||
|
||||
|
||||
def configure(level: str = "INFO", to_stdout: bool = True, rotate_bytes: str = "1 MB"):
|
||||
global _configured
|
||||
if _configured:
|
||||
return log
|
||||
log.remove()
|
||||
if to_stdout:
|
||||
log.add(sys.stdout, level=level)
|
||||
# application rolling log
|
||||
log.add(
|
||||
f"{LOG_DIR}/application.log",
|
||||
rotation=rotate_bytes,
|
||||
retention="10 days",
|
||||
)
|
||||
_configured = True
|
||||
return log
|
||||
BIN
src/sounds/ding.mp3
Normal file
BIN
src/sounds/ding.mp3
Normal file
Binary file not shown.
BIN
src/sounds/error.mp3
Normal file
BIN
src/sounds/error.mp3
Normal file
Binary file not shown.
@@ -2,14 +2,15 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import field as dataclass_field
|
||||
from typing import Any, List
|
||||
|
||||
import loguru
|
||||
|
||||
from src import LOG_DIR
|
||||
from src.logic.dataclass import BookData
|
||||
import loguru
|
||||
import sys
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
@@ -36,6 +37,7 @@ class Item:
|
||||
department: str | None = dataclass_field(default_factory=str)
|
||||
locationhref: str | None = dataclass_field(default_factory=str)
|
||||
location: str | None = dataclass_field(default_factory=str)
|
||||
ktrl_nr: str | None = dataclass_field(default_factory=str)
|
||||
|
||||
def from_dict(self, data: dict):
|
||||
"""Import data from dict"""
|
||||
@@ -382,6 +384,8 @@ class RDSData:
|
||||
def transform(self, data: str):
|
||||
# rds_availability = RDS_AVAIL_DATA()
|
||||
# rds_data = RDS_GENERIC_DATA()
|
||||
print(data)
|
||||
|
||||
def __get_raw_data(data: str) -> list:
|
||||
# create base data to be turned into pydantic classes
|
||||
data = data.split("RDS ----------------------------------")[1]
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
from PySide6 import QtCore, QtGui, QtWidgets
|
||||
|
||||
from src.logic.dataclass import BookData
|
||||
from src.core.models import BookData
|
||||
|
||||
|
||||
class Ui_Dialog(object):
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
from PySide6 import QtCore, QtGui, QtWidgets
|
||||
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
from src.services.webrequest import BibTextTransformer, WebRequest
|
||||
|
||||
|
||||
class Ui_Dialog(object):
|
||||
|
||||
@@ -10,8 +10,9 @@ import hashlib
|
||||
|
||||
from PySide6 import QtCore, QtWidgets
|
||||
|
||||
from src.backend.database import Database
|
||||
from src.backend.admin_console import AdminCommands
|
||||
from src.admin import AdminCommands
|
||||
from src.database import Database
|
||||
|
||||
|
||||
class Ui_Dialog(object):
|
||||
def setupUi(self, Dialog):
|
||||
@@ -64,13 +65,11 @@ class Ui_Dialog(object):
|
||||
def login(self):
|
||||
username = self.lineEdit.text()
|
||||
password = self.lineEdit_2.text()
|
||||
print(type(username), password)
|
||||
# print(type(username), password)
|
||||
# Assuming 'Database' is a class to interact with your database
|
||||
db = Database()
|
||||
db = Database()
|
||||
|
||||
hashed_password = hashlib.sha256(
|
||||
password.encode()
|
||||
).hexdigest()
|
||||
hashed_password = hashlib.sha256(password.encode()).hexdigest()
|
||||
if len(db.getUsers()) == 0:
|
||||
AdminCommands().create_admin()
|
||||
self.lresult = 1 # Indicate successful login
|
||||
|
||||
@@ -6,17 +6,18 @@
|
||||
# run again. Do not edit this file unless you know what you are doing.
|
||||
|
||||
|
||||
from PySide6 import QtCore, QtGui, QtWidgets
|
||||
import subprocess
|
||||
import tempfile
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
from omegaconf import OmegaConf
|
||||
from PySide6 import QtCore, QtWidgets
|
||||
|
||||
config = OmegaConf.load("config.yaml")
|
||||
|
||||
class Ui_eMailPreview(object):
|
||||
|
||||
class Ui_eMailPreview(object):
|
||||
def setupUi(
|
||||
self,
|
||||
eMailPreview,
|
||||
@@ -31,7 +32,10 @@ class Ui_eMailPreview(object):
|
||||
self.buttonBox = QtWidgets.QDialogButtonBox(eMailPreview)
|
||||
self.buttonBox.setGeometry(QtCore.QRect(310, 630, 341, 32))
|
||||
self.buttonBox.setOrientation(QtCore.Qt.Orientation.Horizontal)
|
||||
self.buttonBox.setStandardButtons(QtWidgets.QDialogButtonBox.StandardButton.Cancel|QtWidgets.QDialogButtonBox.StandardButton.Ok)
|
||||
self.buttonBox.setStandardButtons(
|
||||
QtWidgets.QDialogButtonBox.StandardButton.Cancel
|
||||
| QtWidgets.QDialogButtonBox.StandardButton.Ok
|
||||
)
|
||||
self.buttonBox.setObjectName("buttonBox")
|
||||
self.gridLayoutWidget = QtWidgets.QWidget(eMailPreview)
|
||||
self.gridLayoutWidget.setGeometry(QtCore.QRect(10, 10, 661, 621))
|
||||
@@ -46,7 +50,11 @@ class Ui_eMailPreview(object):
|
||||
self.prof_name.setObjectName("prof_name")
|
||||
self.gridLayout.addWidget(self.prof_name, 2, 2, 1, 1)
|
||||
self.label_3 = QtWidgets.QLabel(self.gridLayoutWidget)
|
||||
self.label_3.setAlignment(QtCore.Qt.AlignmentFlag.AlignLeading|QtCore.Qt.AlignmentFlag.AlignLeft|QtCore.Qt.AlignmentFlag.AlignTop)
|
||||
self.label_3.setAlignment(
|
||||
QtCore.Qt.AlignmentFlag.AlignLeading
|
||||
| QtCore.Qt.AlignmentFlag.AlignLeft
|
||||
| QtCore.Qt.AlignmentFlag.AlignTop
|
||||
)
|
||||
self.label_3.setObjectName("label_3")
|
||||
self.gridLayout.addWidget(self.label_3, 5, 0, 1, 1)
|
||||
self.mail_name = QtWidgets.QLineEdit(self.gridLayoutWidget)
|
||||
@@ -81,7 +89,12 @@ class Ui_eMailPreview(object):
|
||||
self.gender_non = QtWidgets.QRadioButton(self.gridLayoutWidget)
|
||||
self.gender_non.setObjectName("gender_non")
|
||||
self.horizontalLayout_3.addWidget(self.gender_non)
|
||||
spacerItem = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Policy.Expanding, QtWidgets.QSizePolicy.Policy.Minimum)
|
||||
spacerItem = QtWidgets.QSpacerItem(
|
||||
40,
|
||||
20,
|
||||
QtWidgets.QSizePolicy.Policy.Expanding,
|
||||
QtWidgets.QSizePolicy.Policy.Minimum,
|
||||
)
|
||||
self.horizontalLayout_3.addItem(spacerItem)
|
||||
self.gridLayout.addLayout(self.horizontalLayout_3, 4, 2, 1, 1)
|
||||
self.label_6 = QtWidgets.QLabel(self.gridLayoutWidget)
|
||||
@@ -89,8 +102,8 @@ class Ui_eMailPreview(object):
|
||||
self.gridLayout.addWidget(self.label_6, 4, 0, 1, 1)
|
||||
|
||||
self.retranslateUi(eMailPreview)
|
||||
self.buttonBox.accepted.connect(eMailPreview.accept) # type: ignore
|
||||
self.buttonBox.rejected.connect(eMailPreview.reject) # type: ignore
|
||||
self.buttonBox.accepted.connect(eMailPreview.accept) # type: ignore
|
||||
self.buttonBox.rejected.connect(eMailPreview.reject) # type: ignore
|
||||
QtCore.QMetaObject.connectSlotsByName(eMailPreview)
|
||||
self._appid = app_id
|
||||
self._appname = app_name
|
||||
@@ -127,7 +140,6 @@ class Ui_eMailPreview(object):
|
||||
return "Sehr geehrte Frau"
|
||||
elif self.gender_non.isChecked():
|
||||
return "Guten Tag"
|
||||
|
||||
|
||||
def set_mail(self):
|
||||
email_template = self.comboBox.currentText()
|
||||
@@ -145,14 +157,19 @@ class Ui_eMailPreview(object):
|
||||
mail_html = mail_template.split("<html>")[1]
|
||||
mail_html = "<html>" + mail_html
|
||||
mail_html = mail_html.format(
|
||||
Profname=self.prof_name.text().split(" ")[1], Appname=self._appname, AppNr=self._appid, AppSubject = self._subject,greeting = self.get_greeting()
|
||||
Profname=self.prof_name.text().split(" ")[1],
|
||||
Appname=self._appname,
|
||||
AppNr=self._appid,
|
||||
AppSubject=self._subject,
|
||||
greeting=self.get_greeting(),
|
||||
)
|
||||
|
||||
self.mail_body.setHtml(mail_html)
|
||||
|
||||
def load_mail_templates(self):
|
||||
mail_templates = os.listdir("mail_vorlagen")
|
||||
mail_templates = [f for f in mail_templates if f.endswith(".eml")]
|
||||
print(mail_templates)
|
||||
# print(mail_templates)
|
||||
self.comboBox.addItems(mail_templates)
|
||||
|
||||
def save_mail(self):
|
||||
@@ -168,16 +185,17 @@ class Ui_eMailPreview(object):
|
||||
) as f:
|
||||
f.write(mail)
|
||||
self.mail_path = f.name
|
||||
print(self.mail_path)
|
||||
# print(self.mail_path)
|
||||
# open the file using thunderbird
|
||||
subprocess.Popen([f"{self.mail_path}"])
|
||||
# delete the file
|
||||
# os.remove(self.mail_path)
|
||||
|
||||
|
||||
def launch():
|
||||
app = QtWidgets.QApplication([])
|
||||
eMailPreview = QtWidgets.QDialog()
|
||||
ui = Ui_eMailPreview()
|
||||
ui.setupUi(eMailPreview, "1","Test","Biologie","Kirchner, Alexander")
|
||||
ui.setupUi(eMailPreview, "1", "Test", "Biologie", "Kirchner, Alexander")
|
||||
eMailPreview.show()
|
||||
app.exec()
|
||||
app.exec()
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE TS>
|
||||
<TS version="2.1">
|
||||
</TS>
|
||||
@@ -110,7 +110,7 @@ class Ui_Form(object):
|
||||
self.progressBar.setValue(value)
|
||||
|
||||
def thread_quit(self):
|
||||
print("Terminating thread")
|
||||
# print("Terminating thread")
|
||||
self.thread.terminate()
|
||||
self.thread.quit()
|
||||
self.thread.deleteLater()
|
||||
@@ -144,7 +144,7 @@ class Ui_Form(object):
|
||||
def determine_progress(self, signal):
|
||||
# check length of listWidget
|
||||
length = self.listWidget.count()
|
||||
print(f"Length of listWidget: {length}")
|
||||
# print(f"Length of listWidget: {length}")
|
||||
if length == 0:
|
||||
logger.log_info("AutoAdder finished")
|
||||
self.buttonBox.accepted.emit()
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# run again. Do not edit this file unless you know what you are doing.
|
||||
|
||||
|
||||
from PySide6 import QtCore, QtGui, QtWidgets
|
||||
from PySide6 import QtCore, QtWidgets
|
||||
|
||||
|
||||
class Ui_Dialog(object):
|
||||
|
||||
@@ -169,7 +169,7 @@ class Ui_Dialog(object):
|
||||
name = application.application
|
||||
file_type = application.extensions
|
||||
display_name = application.name
|
||||
print(name, file_type, display_name) #
|
||||
# print(name, file_type, display_name) #
|
||||
# create new item
|
||||
item = QtWidgets.QTreeWidgetItem(self.treeWidget)
|
||||
item.setText(0, display_name)
|
||||
|
||||
@@ -12,20 +12,23 @@ __all__ = [
|
||||
"ElsaAddEntry",
|
||||
"ApparatExtendDialog",
|
||||
"DocumentPrintDialog",
|
||||
"NewEditionDialog",
|
||||
"Settings",
|
||||
"DeleteDialog",
|
||||
]
|
||||
from .about import About
|
||||
from .app_ext import ApparatExtendDialog
|
||||
from .bookdata import BookDataUI
|
||||
from .deletedialog import DeleteDialog
|
||||
from .docuprint import DocumentPrintDialog
|
||||
from .elsa_add_entry import ElsaAddEntry
|
||||
from .elsa_gen_confirm import ElsaGenConfirm
|
||||
from .login import LoginDialog
|
||||
from .mail import Mail_Dialog
|
||||
from .mailTemplate import MailTemplateDialog
|
||||
from .medienadder import MedienAdder
|
||||
from .newEdition import NewEditionDialog
|
||||
from .parsed_titles import ParsedTitles
|
||||
from .popup_confirm import ConfirmDialog as popus_confirm
|
||||
from .reminder import ReminderDialog
|
||||
from .about import About
|
||||
from .elsa_gen_confirm import ElsaGenConfirm
|
||||
from .elsa_add_entry import ElsaAddEntry
|
||||
from .app_ext import ApparatExtendDialog
|
||||
from .docuprint import DocumentPrintDialog
|
||||
|
||||
from .settings import Settings
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from .dialog_sources.Ui_about import Ui_about
|
||||
from PySide6 import QtWidgets
|
||||
import PySide6
|
||||
from src import Icon, __version__, __author__
|
||||
from PySide6 import QtWidgets
|
||||
|
||||
from src import Icon, __author__, __version__
|
||||
|
||||
from .dialog_sources.about_ui import Ui_about
|
||||
|
||||
|
||||
class About(QtWidgets.QDialog, Ui_about):
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user