34 Commits

Author SHA1 Message Date
d316601e9a fix: issues (1) 2026-02-12 08:54:19 +01:00
8ec92a685c move pytest config into pyproject.toml 2026-02-10 15:12:09 +01:00
29824e8c04 refactor: update documentation server implementation and improve semester class logic 2026-02-10 14:59:34 +01:00
2e5cda6689 bump python version, ruff check 2026-02-10 14:33:54 +01:00
639afe9b95 add prek, tanuki 2026-02-10 12:40:31 +01:00
bcb96213ee Refactor code structure for improved readability and maintainability 2025-12-10 13:47:34 +01:00
67f967aa09 test: lower python version to 3.12 due to nuitka compiling issues 2025-12-03 11:19:29 +01:00
1e320d68c9 fix(ci): fix python version in build to test 3.13 2025-12-03 09:47:10 +01:00
0f41e8b226 experimental(ci): test build with python 3.13 2025-12-02 16:29:32 +01:00
c5099500a2 use correct artifact downloader 2025-12-02 16:22:43 +01:00
759ad0ff0b test new artifact upload 2025-12-02 16:18:22 +01:00
b8e8b87047 fix(ci): add step to create documentation artifact 2025-12-02 15:59:06 +01:00
9d0151a6d1 fix(ci): remove linux build due to lacking power 2025-12-02 15:48:31 +01:00
bfe9b24359 add patchelf for linux 2025-12-01 14:47:22 +01:00
b0e170e2ba fix documentation command 2025-12-01 14:31:39 +01:00
05289ef244 chore(codebase): import fixes, restructuring 2025-12-01 14:24:24 +01:00
6523ad655c Merge pull request 'dev-restructure-dir' (#30) from dev-restructure-dir into dev
Reviewed-on: #30
2025-12-01 12:00:13 +00:00
4eebc922c7 Add src/__init__.py 2025-12-01 12:00:06 +00:00
b05e4eb17f Delete src/__init__.py 2025-12-01 11:58:35 +00:00
dbfcdbd013 Add Adminbereich section with overview and user management pages to navigation, other changes 2025-12-01 12:57:15 +01:00
c7304b484a chore(ci): switch to use uv bump instead of bump-my-version 2025-12-01 11:03:22 +01:00
085d4a9075 chore(deps): add regex, pdf deps 2025-12-01 10:55:28 +01:00
1dba9730c5 Merge pull request 'dev-restructure-dir' (#28) from dev-restructure-dir into dev
Reviewed-on: #28
2025-12-01 09:53:29 +00:00
760f5d1463 chore(ci): switch to only bump if all steps succeed 2025-12-01 10:52:44 +01:00
59d52736a0 chore(ci): change to use gitea wiki instead 2025-12-01 10:42:25 +01:00
4b4711f045 Merge pull request 'Merge restructure branch into dev' (#25) from dev-restructure-dir into dev
Reviewed-on: #25
2025-12-01 09:38:49 +00:00
3da1c14b63 chore(ci): move workflow into correct dir 2025-12-01 10:38:22 +01:00
8491c41428 chore(docs): add zensical setup 2025-12-01 10:37:12 +01:00
cee3379203 feat(docs): migrate to zensical, re-structure docs 2025-12-01 10:36:36 +01:00
9dd4b0328e chore(project): add missing dependencies to config 2025-11-27 14:08:56 +01:00
Gitea CI
d6883b0388 Bump version: 1.0.1 → 1.0.2 2025-11-24 13:13:19 +00:00
9e64d10bf4 Merge pull request 'remove index' (#22) from dev-restructure-dir into dev
Reviewed-on: #22
2025-11-24 13:12:37 +00:00
9f1dfa1030 remove index 2025-11-24 14:11:59 +01:00
Gitea CI
8c42d5fa45 Bump version: 1.0.0 → 1.0.1 2025-11-24 13:06:49 +00:00
209 changed files with 11634 additions and 10102 deletions

63
.gitea/workflows/docs.yml Normal file
View File

@@ -0,0 +1,63 @@
name: Documentation
on:
push:
branches:
- master
- main
paths:
- "docs/**"
- "zensical.toml"
jobs:
deploy-wiki:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Checkout wiki
uses: actions/checkout@v5
with:
repository: ${{ gitea.repository }}.wiki
path: wiki
token: ${{ secrets.GITEA_TOKEN }}
- name: Copy docs to wiki
run: |
# Remove old wiki content (except .git)
find wiki -mindepth 1 -maxdepth 1 ! -name '.git' -exec rm -rf {} +
# Copy markdown files maintaining structure
cp -r docs/* wiki/
# Rename index.md to Home.md for wiki homepage
if [ -f wiki/index.md ]; then
mv wiki/index.md wiki/Home.md
fi
# Flatten folder structure for Gitea wiki compatibility
# Move files from subfolders to root with prefixed names
for dir in wiki/*/; do
if [ -d "$dir" ]; then
dirname=$(basename "$dir")
for file in "$dir"*.md; do
if [ -f "$file" ]; then
filename=$(basename "$file")
if [ "$filename" = "index.md" ]; then
mv "$file" "wiki/${dirname}.md"
else
mv "$file" "wiki/${dirname}-${filename}"
fi
fi
done
rm -rf "$dir"
fi
done
- name: Push to wiki
run: |
cd wiki
git config user.name "Gitea Actions"
git config user.email "actions@gitea.local"
git add -A
git diff --staged --quiet || git commit -m "Update wiki from docs [skip ci]"
git push

View File

@@ -40,7 +40,7 @@ jobs:
fetch-tags: true
- name: Install uv
uses: astral-sh/setup-uv@v5
uses: astral-sh/setup-uv@v7
- name: Set up Python
uses: actions/setup-python@v5
@@ -48,29 +48,29 @@ jobs:
# Uses the version specified in pyproject.toml
python-version-file: "pyproject.toml"
- name: Set Git identity
run: |
git config user.name "Gitea CI"
git config user.email "ci@git.theprivateserver.de"
- name: Bump version
- name: Bump version (local only)
id: bump
run: |
uv tool install bump-my-version
uv version --bump "${{ github.event.inputs.bump }}"
uv tool run bump-my-version bump "${{ github.event.inputs.bump }}"
version="$(uv tool run bump-my-version show current_version)"
version="$(uv version --short)"
echo "VERSION=$version" >> "$GITHUB_ENV"
echo "version=$version" >> "$GITHUB_OUTPUT"
echo "tag=v$version" >> "$GITHUB_OUTPUT"
# no env needed here, uv handles the Python it installs
- name: Push changes
uses: ad-m/github-push-action@master
- name: Install all dependencies
run: uv sync --all-groups
- name: Build documentation
run: uv run zensical build --clean
- name: Upload documentation artifact
uses: https://github.com/christopherHX/gitea-upload-artifact@v4
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: ${{ github.ref }}
name: site
path: site/
retention-days: 1
- name: Build Changelog
id: build_changelog
@@ -82,83 +82,91 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITEA_TOKEN }}
build-linux:
needs: prepare
runs-on: ubuntu-latest
env:
VERSION: ${{ needs.prepare.outputs.version }}
TAG_NAME: ${{ needs.prepare.outputs.tag }}
# build-linux:
# needs: prepare
# runs-on: ubuntu-latest
# env:
# VERSION: ${{ needs.prepare.outputs.version }}
# TAG_NAME: ${{ needs.prepare.outputs.tag }}
steps:
- name: Checkout code
uses: actions/checkout@v5
with:
fetch-depth: 0
fetch-tags: true
# steps:
# - name: Checkout code
# uses: actions/checkout@v5
# with:
# fetch-depth: 0
# fetch-tags: true
- name: Install uv
uses: astral-sh/setup-uv@v5
# - name: Install uv
# uses: astral-sh/setup-uv@v7
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"
# - name: Set up Python
# uses: actions/setup-python@v5
# with:
# python-version-file: "pyproject.toml"
- name: Install all dependencies
run: uv sync --all-groups
# - name: Install all dependencies
# run: uv sync --all-groups
- name: Build Linux release with Nuitka
run: |
uv run python -m nuitka \
--standalone \
--output-dir=dist \
--include-data-dir=./config=config \
--include-data-dir=./site=site \
--include-data-dir=./icons=icons \
--include-data-dir=./mail_vorlagen=mail_vorlagen \
--enable-plugin=pyside6 \
--product-name=SemesterApparatsManager \
--product-version=${VERSION} \
--output-filename=SAM \
main.py
# - name: Build documentation
# run: uv run zensical build --clean
- name: Prepare Linux Release Artifact
run: |
mkdir -p releases
cd dist/SemesterApparatsManager.dist
zip -r "../../releases/SAM-linux-v${VERSION}.zip" *
cd ../../
# - name: Build Linux release with Nuitka
# run: |
# uv add patchelf
# uv run python -m nuitka \
# --standalone \
# --output-dir=dist \
# --include-data-dir=./config=config \
# --include-data-dir=./site=site \
# --include-data-dir=./icons=icons \
# --include-data-dir=./mail_vorlagen=mail_vorlagen \
# --enable-plugin=pyside6 \
# --product-name=SemesterApparatsManager \
# --product-version=${VERSION} \
# --output-filename=SAM \
# main.py
- name: Create / Update Gitea Release (Linux asset + changelog)
if: ${{ github.event.inputs.github_release == 'true' }}
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ env.TAG_NAME }}
name: Release ${{ env.TAG_NAME }}
body: ${{ needs.prepare.outputs.changelog }}
draft: false
prerelease: ${{ github.event.inputs.prerelease }}
make_latest: true
files: |
releases/SAM-linux-v${{ env.VERSION }}.zip
env:
GITHUB_TOKEN: ${{ secrets.TOKEN }}
GITHUB_REPOSITORY: ${{ github.repository }}
# - name: Prepare Linux Release Artifact
# run: |
# mkdir -p releases
# cd dist/SemesterApparatsManager.dist
# zip -r "../../releases/SAM-linux-v${VERSION}.zip" *
# cd ../../
# - name: Create / Update Gitea Release (Linux asset + changelog)
# if: ${{ github.event.inputs.github_release == 'true' }}
# uses: softprops/action-gh-release@v2
# with:
# tag_name: ${{ env.TAG_NAME }}
# name: Release ${{ env.TAG_NAME }}
# body: ${{ needs.prepare.outputs.changelog }}
# draft: false
# prerelease: ${{ github.event.inputs.prerelease }}
# make_latest: true
# files: |
# releases/SAM-linux-v${{ env.VERSION }}.zip
# env:
# GITHUB_TOKEN: ${{ secrets.TOKEN }}
# GITHUB_REPOSITORY: ${{ github.repository }}
build-windows:
needs: [prepare, build-linux]
needs: prepare
runs-on: windows-latest
env:
VERSION: ${{ needs.prepare.outputs.version }}
TAG_NAME: ${{ needs.prepare.outputs.tag }}
UV_PATH: 'C:\Users\gitea_runner_windows\.local\bin\uv.exe'
UV_NO_PROJECT: "1"
UV_NO_CONFIG: "1"
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: Download documentation artifact
uses: christopherhx/gitea-download-artifact@v4
with:
name: site
path: site/
- name: Ensure Python via uv
shell: powershell
run: |
@@ -167,6 +175,8 @@ jobs:
exit 1
}
& $env:UV_PATH self update
$version = "3.12"
Write-Host "Checking for Python $version via uv..."
$exists = & $env:UV_PATH python list | Select-String $version -Quiet
@@ -209,15 +219,53 @@ jobs:
Compress-Archive -Path * -DestinationPath "..\releases\SAM-windows-v${env:VERSION}.zip" -Force
Set-Location ..
- name: Attach Windows asset to Gitea Release
- name: Create / Update Gitea Release (Windows asset + changelog)
if: ${{ github.event.inputs.github_release == 'true' }}
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ env.TAG_NAME }}
name: Release ${{ env.TAG_NAME }}
body: ${{ needs.prepare.outputs.changelog }}
draft: false
prerelease: ${{ github.event.inputs.prerelease }}
make_latest: true
files: |
releases/SAM-windows-v${{ env.VERSION }}.zip
env:
GITHUB_TOKEN: ${{ secrets.TOKEN }}
# GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_REPOSITORY: ${{ github.repository }}
finalize:
needs: [prepare, build-windows]
runs-on: ubuntu-latest
env:
VERSION: ${{ needs.prepare.outputs.version }}
steps:
- name: Checkout code
uses: actions/checkout@v5
with:
fetch-depth: 0
fetch-tags: true
- name: Install uv
uses: astral-sh/setup-uv@v7
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"
- name: Set Git identity
run: |
git config user.name "Gitea CI"
git config user.email "ci@git.theprivateserver.de"
- name: Bump version and push
run: |
uv version --bump "${{ github.event.inputs.bump }}"
- name: Push version bump
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: ${{ github.ref }}

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "docs/themes/tanuki"]
path = docs/themes/tanuki
url = https://github.com/raskell-io/tanuki

13
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,13 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.6
hooks:
# Run the formatter
- id: ruff-format
name: ruff format
types_or: [python, pyi, jupyter]
# Run the linter with auto-fix
- id: ruff
name: ruff check
args: [--fix]
types_or: [python, pyi, jupyter]

View File

@@ -1 +1 @@
3.13
3.14

View File

@@ -1 +1 @@
1.0.0
1.0.2

View File

@@ -32,6 +32,7 @@
"cSpell.words": [
"adis",
"Adminbereich",
"akkey",
"Apparatdetails",
"apparate",
"appname",
@@ -54,4 +55,7 @@
"Strg",
"telnr"
],
"yaml.schemas": {
"https://www.schemastore.org/github-workflow.json": "file:///c%3A/Users/aky547/GitHub/SemesterapparatsManager/.gitea/workflows/release.yml"
},
}

View File

@@ -373,26 +373,31 @@ CREATE TABLE IF NOT EXISTS user_preferences (
2. Convert: `pyside6-uic dialog.ui -o dialog_ui.py`
3. Create dialog class in `src/ui/dialogs/`
4. Connect signals to business logic
### Building Documentation
```bash
# Using uv
uv run mkdocs build
uv run mkdocs serve # View at http://localhost:8000
## 📚 Documentation
# Or with activated venv
mkdocs build
mkdocs serve
```*[API Documentation](docs/)**: Detailed module documentation
- **[User Manual](docs/index.md)**: Complete user guide (MkDocs)
- **[User Manual](docs/)**: Complete user guide built with Zola and the Tanuki theme
- View documentation at `http://localhost:8000` when running the application
### Building Documentation
The documentation is built using [Zola](https://www.getzola.org/) with the Tanuki theme.
```bash
mkdocs build
mkdocs serve # View at http://localhost:8000
# Build documentation using the provided script
.\build_docs.ps1
# Or manually:
cd docs
zola build
# Serve documentation locally for development
cd docs
zola serve # View at http://127.0.0.1:1111
```
The built documentation is served automatically when you run the application and access the documentation menu.
## 🤝 Contributing
Contributions are welcome! Please follow these guidelines:

View File

@@ -1,9 +1,10 @@
from typing import Optional, Any, Union
from dataclasses import dataclass
from omegaconf import OmegaConf, DictConfig
from omegaconf import OmegaConf, DictConfig, ListConfig
import os
from pathlib import Path
@dataclass
class OpenAI:
api_key: str
@@ -15,6 +16,7 @@ class OpenAI:
def _setattr(self, name: str, value: Any):
setattr(self, name, value)
@dataclass
class Zotero:
api_key: str
@@ -33,6 +35,7 @@ class Database:
name: str
path: Union[str, Path, None]
temp: Union[str, Path, None]
def getattr(self, name: str):
return getattr(self, name)
@@ -45,6 +48,7 @@ class Database:
if isinstance(self.temp, str):
self.temp = Path(self.temp).expanduser()
@dataclass
class Mail:
smtp_server: str
@@ -136,8 +140,9 @@ class Config:
"""
_config: Optional[DictConfig] = None
_config: Optional[Union[DictConfig, ListConfig]] = None
config_exists: bool = True
def __init__(self, config_path: str):
"""
Loads the configuration file and stores it for future access.
@@ -178,22 +183,25 @@ class Config:
"""
Reloads the configuration from the file.
"""
self._config = OmegaConf.load(self.config_path)
if self.config_path is not None:
self._config = OmegaConf.load(self.config_path)
@property
def zotero(self):
if self._config is None:
raise RuntimeError("Configuration not loaded")
return Zotero(**self._config.zotero)
@property
def zotero_attr(self, name: str):
def get_zotero_attr(self, name: str):
return getattr(self.zotero, name)
@zotero_attr.setter
def zotero_attr(self, name: str, value: Any):
def set_zotero_attr(self, name: str, value: Any):
self.zotero._setattr(name, value)
@property
def database(self):
if self._config is None:
raise RuntimeError("Configuration not loaded")
return Database(**self._config.database)
@property
@@ -206,43 +214,57 @@ class Config:
@property
def openAI(self):
if self._config is None:
raise RuntimeError("Configuration not loaded")
return OpenAI(**self._config.openAI)
@property
def mail(self):
if self._config is None:
raise RuntimeError("Configuration not loaded")
return Mail(**self._config.mail)
def mail_attr(self, name: str):
return getattr(self.mail, name)
def set_mail_attr(self, name: str, value: Any):
OmegaConf.update(self._config, f"mail.{name}", value)
if self._config is not None:
OmegaConf.update(self._config, f"mail.{name}", value)
def set_database_attr(self, name: str, value: Any):
OmegaConf.update(self._config, f"database.{name}", value)
if self._config is not None:
OmegaConf.update(self._config, f"database.{name}", value)
def set_zotero_attr(self, name: str, value: Any):
OmegaConf.update(self._config, f"zotero.{name}", value)
if self._config is not None:
OmegaConf.update(self._config, f"zotero.{name}", value)
def set_openai_attr(self, name: str, value: Any):
OmegaConf.update(self._config, f"openAI.{name}", value)
if self._config is not None:
OmegaConf.update(self._config, f"openAI.{name}", value)
def set_icon_attr(self, name: str, value: Any):
OmegaConf.update(self._config, f"icons.{name}", value)
if self._config is not None:
OmegaConf.update(self._config, f"icons.{name}", value)
@property
def save_path(self):
if self._config is None:
raise RuntimeError("Configuration not loaded")
return self._config.save_path
@save_path.setter
def save_path(self, value: str):
self._config.save_path = value
if self._config is not None:
self._config.save_path = value
def load_config(self, path, filename):
return OmegaConf.load(os.path.join(path, filename))
@property
def icons(self):
if self._config is None:
raise RuntimeError("Configuration not loaded")
icons = Icons()
icons.assign("path", self._config.icon_path)
icons.assign("colors", self._config.colors)

24
dev/compile_modified.py Normal file
View File

@@ -0,0 +1,24 @@
import py_compile
import sys
paths = [
'src/ui/widgets/new_edition_check.py',
'src/utils/icon.py',
'src/ui/widgets/graph.py',
'src/ui/userInterface.py',
'src/ui/dialogs/mailTemplate.py',
'src/services/catalogue.py',
'src/backend/catalogue.py',
'src/parsers/xml_parser.py',
'src/parsers/csv_parser.py',
'src/parsers/transformers/transformers.py',
'src/core/semester.py',
]
errs = 0
for p in paths:
try:
py_compile.compile(p, doraise=True)
print('OK:', p)
except Exception as e:
print('ERROR:', p, e)
errs += 1
sys.exit(errs)

View File

@@ -0,0 +1,35 @@
# Requires PowerShell 5+
# Scans all .ui files under src/ and runs pyside6-lupdate to generate/update .ts files next to them.
# Usage: Run from repository root: `pwsh dev/update_translations.ps1` or `powershell -ExecutionPolicy Bypass -File dev/update_translations.ps1`
$ErrorActionPreference = 'Stop'
# Use current working directory (CWD) for relative paths
$cwd = Get-Location
$lupdate = Join-Path $cwd '.venv\Scripts\pyside6-lupdate.exe'
if (-not (Test-Path $lupdate)) {
Write-Error "Qt for Python lupdate not found at '$lupdate'. Ensure venv is created and PySide6 tools installed."
}
$uiFiles = Get-ChildItem -Path (Join-Path $cwd 'src') -Filter '*.ui' -Recurse -File
if ($uiFiles.Count -eq 0) {
Write-Host 'No .ui files found under src/. Nothing to update.'
exit 0
}
foreach ($ui in $uiFiles) {
# Compute .ts path next to the .ui file
$tsPath = [System.IO.Path]::ChangeExtension($ui.FullName, '.ts')
# Ensure target directory exists
$tsDir = Split-Path -Parent $tsPath
if (-not (Test-Path $tsDir)) { New-Item -ItemType Directory -Path $tsDir | Out-Null }
# Use absolute paths to avoid path resolution issues
$uiAbs = $ui.FullName
$tsAbs = $tsPath
Write-Host "Updating translations: $uiAbs -> $tsAbs"
& $lupdate $uiAbs '-ts' $tsAbs
}
Write-Host 'Translation update completed.'

View File

@@ -0,0 +1,31 @@
#!/usr/bin/env bash
# Scans all .ui files under src/ and runs pyside6-lupdate to generate/update .ts files next to them.
# Usage: Run from repository root: `bash dev/update_translations.sh`
set -euo pipefail
# Ensure we are in repo root (script's directory is dev/)
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(dirname "$SCRIPT_DIR")"
cd "$REPO_ROOT"
LUPDATE=".venv/bin/pyside6-lupdate"
if [[ ! -x "$LUPDATE" ]]; then
echo "Qt for Python lupdate not found at '$LUPDATE'. Ensure venv is created and PySide6 tools installed." >&2
exit 1
fi
shopt -s nullglob
mapfile -t UI_FILES < <(find src -type f -name '*.ui')
if [[ ${#UI_FILES[@]} -eq 0 ]]; then
echo "No .ui files found under src/. Nothing to update."
exit 0
fi
for ui in "${UI_FILES[@]}"; do
ts="${ui%.ui}.ts"
echo "Updating translations: $ui -> $ts"
"$LUPDATE" "$ui" -ts "$ts"
done
echo "Translation update completed."

View File

@@ -1,25 +0,0 @@
# Adminbereich
Der Adminbereich ist nur freigeschaltet, wenn der angemeldete Nutzer die Rolle admin hat. Hier können neue Nutzer angelegt, bestehende Nutzer bearbeitet oder gelöscht werden. Zusätzlich können die Daten der ProfessorInnen bearbeitet werden.
Die Verschiedenen Aktionen können über das Dropdown-Menü ausgewählt werden.
## Nutzer anlegen
![Nutzer anlegen](images/admin_create_user.png)
Hier kann ein neuer Nutzer angelegt werden. Dazu müssen der Nutzername, das Password und die Rolle angegeben werden. Die Rolle kann frei vergeben, oder aus dem Dropdown ausgewählt werden.
Über den Knopf **Anlegen** wird der Nutzer angelegt.
## Nutzer bearbeiten
![Nutzer bearbeiten](images/admin_edit_user.png)
Hier können die Verschiedenen Nutzer bearbeitet oder gelöscht werden. Hat der ausgewählte Nutzer die Rolle admin, so kann dieser nicht gelöscht werden.
Um einen Nutzer zu löschen, muss sowohl ein Haken bei **Löschen** gesetzt werden, als auch der Knopf **Löschen** gedrückt werden.
## Lehrperson bearbeiten
![Lehrperson bearbeiten](images/admin_edit_prof.png)
Hier können die Daten der Lehrperson bearbeitet werden, oder die Lehrperson gelöscht werden. Um eine Lehrperson zu löschen, darf kein Aktiver Apparat vorhanden sein, sowie keine ELSA Aufträge vorhanden sein.
Um eine Lehrperson zu bearbeiten, muss der Name im Dropdown bei "Alte Angaben" ausgewählt werden. Die Alten Daten werden nun in der Maske angezeigt. Die neuen Daten können nun im unteren Bereich eingegeben werden. Über den Knopf **Aktualisieren** werden die Daten gespeichert.

1
docs/admin/uebersicht.md Normal file
View File

@@ -0,0 +1 @@
# Übersicht

View File

@@ -0,0 +1,118 @@
# Hauptoberfläche
![Hauptoberfläche](../images/mainUI.png){ loading=lazy }
Die Hauptoberfläche des SemesterapparatsManager besteht aus drei Hauptbereichen, die über Tabs erreichbar sind:
## Navigation
<div class="grid cards" markdown>
- :lucide-book-plus:{ .lg .middle } **Anlegen**
---
Neue Semesterapparate erstellen, bestehende aktualisieren oder löschen.
[:octicons-arrow-right-24: Zum Anlegen](../semesterapparat/anlegen.md)
- :lucide-search:{ .lg .middle } **Suchen/Statistik**
---
Semesterapparate suchen, filtern und Statistiken einsehen.
[:octicons-arrow-right-24: Zur Suche](../semesterapparat/suche.md)
- :lucide-file-text:{ .lg .middle } **ELSA**
---
Elektronische Semesterapparate anlegen und Zitate erstellen.
[:octicons-arrow-right-24: Zu ELSA](../elsa/anlegen.md)
- :octicons-person-24:{ .lg .middle } **Admin**
---
Adminbereich (nur wenn der angemeldete Nutzer die `Admin` Rolle hat)
[:octicons-arrow-right-24: Zur Übersicht](../admin/uebersicht.md)
</div>
---
## Übersichtstabelle
![Übersichtstabelle](../images/main_overview.png){ loading=lazy }
In diesem Bereich werden alle erstellten Semesterapparate angezeigt.
!!! tip "Tipp: Doppelklick"
Über einen **Doppelklick** auf einen Apparat werden alle Details geladen und in den Apparatdetails angezeigt.
### Verfügbare Aktionen
| Knopf | Funktion |
|-------|----------|
| :lucide-printer: **Übersicht erstellen** | Erstellt eine druckbare Übersicht der angezeigten Apparate für das Regal |
| :lucide-plus: **neu. App anlegen** | Schaltet die Apparatdetails frei für einen neuen Apparat |
| :lucide-x: **Auswahl abbrechen** | Entfernt alle Daten und deaktiviert die Apparatdetails |
!!! note "Hinweis: Übersicht drucken"
Die Übersicht wird per Mail an den konfigurierten Drucker geschickt. Vor dem Drucken erfolgt eine Bestätigungsabfrage.
---
## Einstellungen
Die Einstellungen erreichen Sie über das Menü oder das :lucide-settings: Icon.
![Einstellungen](../images/settings.png){ loading=lazy }
!!! info "Automatisches Wiederherstellen"
Die zuletzt geöffnete Seite wird automatisch beim nächsten Start geöffnet.
### Datenbank
Hier sind alle Informationen zur Datenbank sowie den temporären Daten hinterlegt.
!!! warning "Mehrere Nutzer"
Sollte die Anwendung von mehreren Nutzern benutzt werden, sollte der Datenbankpfad nur in Absprache geändert werden. Ansonsten kann es zu Synchronisationsproblemen kommen.
### Zotero
![Zotero Einstellungen](../images/settings_zotero.png){ loading=lazy }
Konfigurieren Sie hier die Zugangsdaten für Zotero, die für die [ELSA-Zitate](../elsa/zitieren.md) benötigt werden.
### E-Mail
![E-Mail Einstellungen](../images/settings_mail.png){ loading=lazy }
=== "Allgemein"
Zugangsdaten für den SMTP-Mailversand. Diese werden für Benachrichtigungen an Dozenten benötigt.
=== "Signatur"
Die Signatur wird automatisch an jede ausgehende Mail angehängt.
### Icons
![Icon Einstellungen](../images/settings_icons.png){ loading=lazy }
Übersicht der aktuellen Icons und verfügbaren Farbschemata.
---
## Speichern der Einstellungen
Über den Knopf **Ok** werden die Einstellungen gespeichert.
!!! success "Sofortige Übernahme"
Die meisten Einstellungen werden sofort übernommen. Sollte ein Neustart erforderlich sein, werden Sie darüber informiert.

31
docs/allgemein/index.md Normal file
View File

@@ -0,0 +1,31 @@
# Allgemein
Willkommen in der Dokumentation des **SemesterapparatsManager**! In diesem Abschnitt finden Sie alle grundlegenden Informationen zum Programm.
<div class="grid cards" markdown>
- :lucide-info:{ .lg .middle } **Info**
---
Erfahren Sie mehr über den SemesterapparatsManager und seine Funktionen.
[:octicons-arrow-right-24: Mehr erfahren](info.md)
- :lucide-download:{ .lg .middle } **Installation**
---
Installieren Sie den SemesterapparatsManager in wenigen Schritten.
[:octicons-arrow-right-24: Zur Installation](installation.md)
- :lucide-layout-dashboard:{ .lg .middle } **Hauptoberfläche**
---
Lernen Sie die Benutzeroberfläche des Programms kennen.
[:octicons-arrow-right-24: Zur Übersicht](hauptoberflaeche.md)
</div>

57
docs/allgemein/info.md Normal file
View File

@@ -0,0 +1,57 @@
# Info
## Über den SemesterapparatsManager
Der **SemesterapparatsManager** ist ein grafisches Werkzeug zur Verwaltung von Semesterapparaten an der Pädagogischen Hochschule Freiburg.
!!! abstract "Was ist ein Semesterapparat?"
Ein Semesterapparat ist eine Sammlung von Literatur, die von Dozenten für ihre Lehrveranstaltungen zusammengestellt wird. Die Bücher werden in der Bibliothek bereitgestellt und können von Studierenden eingesehen werden.
## Funktionen
Die Anwendung ermöglicht eine benutzerfreundliche Verwaltung von physischen und digitalen Semesterapparaten:
<div class="grid cards" markdown>
- :lucide-book-plus:{ .lg .middle } **Anlegen**
---
Erstellen Sie neue Semesterapparate mit allen notwendigen Informationen zu Dozenten, Fächern und Literatur.
- :lucide-search:{ .lg .middle } **Suchen & Statistik**
---
Durchsuchen Sie bestehende Apparate und erhalten Sie statistische Auswertungen.
- :lucide-file-text:{ .lg .middle } **ELSA**
---
Verwalten Sie elektronische Semesterapparate (ELSA) mit automatischer Zitat-Erstellung via Zotero.
- :lucide-mail:{ .lg .middle } **Kommunikation**
---
Versenden Sie automatisierte E-Mails an Dozenten bei Erstellung oder Löschung von Apparaten.
</div>
## Technische Details
| Eigenschaft | Wert |
|-------------|------|
| **Programmiersprache** | Python 3.10+ |
| **GUI-Framework** | PySide6 (Qt) |
| **Datenbank** | SQLite |
| **Zitat-System** | Zotero Integration |
| **Stil** | DGPs (Deutsche Gesellschaft für Psychologie) |
## Entwicklung
Der SemesterapparatsManager wurde entwickelt von **Alexander Kirchner** für die Pädagogische Hochschule Freiburg.
!!! info "Open Source"
Der Quellcode ist auf einer privaten [Gitea](https://about.gitea.com/) Instanz und kann bei Bedarf eingesehen werden.

View File

@@ -0,0 +1,92 @@
# Installation
## Voraussetzungen
Bevor Sie den SemesterapparatsManager installieren können, stellen Sie sicher, dass folgende Voraussetzungen erfüllt sind:
- [x] Windows 10/11 oder höher
- [x] Internetzugang für Katalog-Abfragen
- [x] Optional: Zotero-Account für ELSA-Funktionen
!!! warning "Buchmetadaten"
Die Metadaten für Bücher können aktuell nur aus dem Hochschulnetz geladen werden, da diese auf ein Internes Format zugreifen, welches nur im Hochschulnetz angezeigt wird.
## Installation
### Für Endanwender
=== "Portable Version"
1. Laden Sie die neueste Version von der Release-Seite herunter
2. Entpacken Sie die ZIP-Datei in einen Ordner Ihrer Wahl
3. Starten Sie `SemesterapparatsManager.exe`
=== "Installer"
1. Laden Sie den Installer herunter
2. Führen Sie die Setup-Datei aus
3. Folgen Sie den Anweisungen des Installationsassistenten
4. Starten Sie das Programm über das Startmenü
### Für Entwickler
!!! note "Entwicklerinstallation"
Diese Anleitung ist für Entwickler gedacht, die den Quellcode bearbeiten möchten.
#### Mit UV (empfohlen)
```bash
# Repository klonen
git clone https://github.com/IHR-REPO/SemesterapparatsManager.git
cd SemesterapparatsManager
# Virtuelle Umgebung erstellen und Abhängigkeiten installieren
uv sync
# Anwendung starten
uv run python main.py
```
#### Mit pip
```bash
# Repository klonen
git clone https://github.com/IHR-REPO/SemesterapparatsManager.git
cd SemesterapparatsManager
# Virtuelle Umgebung erstellen
python -m venv .venv
.venv\Scripts\activate
# Abhängigkeiten installieren
pip install -e ".[dev]"
# Anwendung starten
python main.py
```
## Erster Start
Beim ersten Start werden Sie aufgefordert, sich anzumelden:
![Login](../images/login.png){ loading=lazy }
!!! tip "Tipp"
Ihre Anmeldedaten werden verschlüsselt gespeichert. Sollten Sie Ihr Passwort vergessen, wenden Sie sich bitten an einen Nutzer mir Adminberechtigungen, um das Passwort zu ändern. Bei Fragen zur Einrichtung wenden Sie sich an den Administrator.
Sofern Sie keine eigenen Zugangsdaten bei der Einrichtung eingegeben haben, sind dies die Standardanmeldedaten:
**Username**: admin
**Password**: admin
## Konfiguration
Nach der Installation sollten Sie die Einstellungen überprüfen:
1. Öffnen Sie das Programm
2. Gehen Sie zu **Einstellungen** (über das Menü oder :lucide-settings:)
3. Konfigurieren Sie:
- **Datenbank-Pfad**: Speicherort der SQLite-Datenbank
- **Zotero**: API-Schlüssel für Zitat-Funktionen
- **E-Mail**: SMTP-Einstellungen für Benachrichtigungen
Weitere Informationen zur Konfiguration finden Sie unter [Hauptoberfläche](hauptoberflaeche.md).

View File

@@ -1,30 +0,0 @@
# Zitieren
## Oberfläche
![Zitieroberfläche](images/generate_quote_emtpy.png)
Die [ELSA](elsa.md) Oberfläche bietet die Möglichkeit, für Einträge automatisch Zitate zu erstellen. Hierfür wird der Stil `Deutsche Gesellschaft für Psychologie (DGPs)` verwendet.
Um ein Zitat zu erstellen, muss zuerst ein Eintrag in der Tabelle ausgewählt werden. Über den Knopf **Eintrag zitieren** wird ein Dialog geöffnet, in dem der Eintrag zitiert werden kann.
Sollte ein Eintrag mehrere Abschnitte beinhalten, muss nach der automatischen Suche die Seitenzahl angepasst werden. Ist die Seitenzahl in der Tabelle nur für einen Abschnitt, so wird diese automatisch übernommen.
## Zitierdialog
![Zitierdialog mit Ergebnis](images/quote_search_result.png)
Nachdem auf den Knopf **Eintrag zitieren** geklickt wird, wird automatisch der Katalog angefragt und relevante Daten werden in die Felder eingetragen. Ist die Seitenzahl in der Tabelle nur für einen Abschnitt, so wird diese automatisch übernommen.
!!! info "Erläuterung der Knöpfe"
- **Suchen** Sucht im Katalog nach dem eingegebenen Identifikator
- **Zitat erstellen** Stellt eine Verbindung zu Zotero her, erstellt ein entsprechendes Werk und erhält die Zitate als Ergebnis; wechselt zur Oberfläche der Zitate
- **Ok** Schließt den Dialog
- **Discard** Entfernt alle Eingaben
- **Abbrechen** Schließt den Dialog
- **Wiederholen** Geht zu den Eingabefeldern zurück, ermöglicht eine erneute Suche mit geänderten Eingaben
## Zitate
![Zitate](images/quote_generated.png)
Über den Knopf **Zitat erstellen** wird eine Verbindung zu Zotero hergestellt und ein entsprechendes Werk erstellt. Die Zitate werden als Ergebnis angezeigt. Der Dialog wechselt automatisch zur Oberfläche der Zitate.

View File

@@ -1,42 +0,0 @@
# Einstellungen
![Einstellungen](images/settings.png)
In den Einstellungen werden alle Informationen angezeigt, die in der config.yaml Datei hinterlegt sind. Diese Datei wird beim Start der Datei eingelesen und als globale Klasse `Config` gespeichert. Dadurch können Einstellungen sofort geändert werden und das Programm muss nicht für jede Änderung neu gestartet werden.
!!! Info
Die zuletzt geöffnete Seite wird automatisch beim nächsten Start geöffnet.
## Seiten
### Datenbank
Hier sind alle Informationen zur Datenbank, sowie den Tempörären Daten hinterlegt.
Der Speicherort der Datenbank kann über den Knopf `...` neben dem Datenbanknamen geändert werden. Der Datenbankpfad passt sich automatisch an.
!!! Warning "Hinweis - Mehrere Nutzer"
Sollte die Anwendung von mehreren Nutzern benutzt werden, sollte dieser Pfad nur in absprache geändert werden. Ansonsten kann es zu Problemen kommen.
### Zotero
![Zotero](images/settings_zotero.png)
In diesem Bereich können die Zugangsdaten für Zotero hinterlegt werden. Diese werden benötigt, um die Zitate für die [ELSA](elsa.md#einträge-zitieren) Zitate zu erstellen.
### e-Mail
![e-Mail](images/settings_mail.png)
Dieser Bereich ist zweigeteilt, einmal der Allgemeine Teil, und einmal der Teil für die Mailsignatur
#### Allgemein
Hier können die Zugangsdaten für den Mailversand hinterlegt werden. Diese werden benötigt, um die Nachrichten an die ProffessorInnen zu versenden. Mehr Infos: [Mailversand](mail.md)
#### Signatur
Hier kann die Signatur für die Mails hinterlegt werden. Diese wird automatisch an jede Mail angehängt.
### Icons
![Icons](images/settings_icons.png)
Hier werden sowohl die aktuellen Icons, wie auch die verfügbaren Farben angezeigt.
## Speichern
Über den Knopf **Ok** werden die Einstellungen gespeichert. Sollte ein Neustart der Anwendung erforderlich sein, wird darüber informiert. Ansonsten werden die Einstellungen sofort übernommen.

View File

@@ -1,90 +0,0 @@
# Semesterapparat anlegen
Um einen neuen Semesterapparat anzulegen, muss zuerst der Knopf "neu. App anlegen" gedrückt werden. Das Feld der Apparatdetails wird nun zum bearbeiten entsperrt, und die Daten können in die Felder eingetragen werden.
![Anlegen](images/main_apparatdetails.png)
## Apparatdetails
### Apparat anlegen
Um einen Apparat erfolgreich anzulegen, müssen alle Felder, die ein Sternchen (*) haben, ausgefüllt werden. Ist eine Eingabe nicht valide, wird neben der Eingabe ein rotes X angezeigt (siehe Bild).
Folgende Felder haben spezielle Formatierungsanforderungen:
- Prof. Name: Der Name des Professors muss in der Form "Nachname, Vorname" eingegeben werden.
- Mail: Die Mailadresse muss in der Form "irgend@etwas.xy" eingegeben werden.
- Tel: Die Telefonnummer muss mindestens 3 Ziffern enthalten.
- Semester: Das Semester wird wie folgt angegeben:
- Wintersemester: Checkbox Winter + aktives Jahr wenn Monat zwischen Oktober und Dezember; ansonsten aktives Jahr - 1
- Sommersemester: Checkbox Sommer + aktives Jahr wenn Monat zwischen März und August
Beim Versuch, den Apparat zu speichern, bevor alle Felder korrekt ausgefüllt sind, wird eine Fehlermeldung angezeigt, die auf die fehlerhaften Felder hinweist. Nachdem alle Felder korrekt ausgefüllt sind, kann der Apparat gespeichert werden.
![Fehler](images/main_apparatdetails_error.png)
Über einen Klick auf Ok oder Cancel wird die Meldung geschlossen und der Apparat kann weiter bearbeitet werden.
### Dokumente hinzufügen
Dem Apparat können Dokumente hinzugefügt werden. Besonders hilfreich ist das hinzufügen der Antragsformulare, da der SemesterapparatsManager diese Datei lesen und die Bücher automatisch dem Apparat hinzufügen kann.
Dokumente werden über den Knopf "Dokumente hinzufügen" hinzugefügt werden. Es öffnet sich ein Auswahldialog, bei dem Sie dei Datei(en) auswählen können, die Sie hinzufügen möchten.
Handelt es sich bei der Datei um den Antrag, so kann dieser mit dem Knopf "Medien aus Dokument hinzufügen" ausgelesen werden.
!!! Warning "ZU BEACHTEN"
Wird dieser Knopf gedrückt, wird der Apparat, wenn möglich, gespeichert und angelegt. Dies ist notwendig, da die Medien nur dann dem Apparat hinzugefügt werden können, wenn dieser bereits in der Datenbank existiert.
Die erkannten Medien werden nun hinzugefügt. Über den Bereich "Medienliste" kann der Fortschritt eingesehen werden. Solange noch Medien hinzugefügt werden, ist es nicht möglich, den Apparat zu bearbeiten, die Auswahl zu beenden oder einen anderen Apparat auszuwählen.
### Apparat speichern
Nachdem alle Felder korrekt ausgefüllt sind, kann der Apparat gespeichert werden. Dazu muss der Knopf "Speichern" gedrückt werden. Der Apparat wird nun in der Datenbank gespeichert und wird in der Tabelle angezeigt. Wurde vor dem Speichern der Haken "Mail senden" gesetzt, öffnet sich ein Fenster, in dem einen Mail, basierend auf einem Template, an den Professor gesendet werden kann. (Erfordert Mail Zugangsdaten [siehe Konfiguration](config.md#email))
## Medienliste
In der Medienliste werden alle Medien angezeigt, die dem Apparat hinzugefügt wurden. Hier können die Medien bearbeitet, gelöscht oder hinzugefügt werden.
Wurde ein Apparat ausgewählt, werden einige Felder unterhalb der Medienliste angezeigt:
![Optionen der Medienliste](images/medialist_options.png)
Standardmäßig werden nur Medien angezeigt, die nicht aus dem Apparat entfernt wurden. Über den Checkbox "gel. Medien anzeigen" werden auch gelöschte Medien angezeigt.
Der Knopf "im Apparat?" kann für ein einzelnes, oder mehrere Medien verwendet werden, um zu prüfen, ob die ausgewählten Medien inzwischen dem Apparat hinzugefügt wurden.
Unter der Liste befindet sich ein Knopf "Medien hinzufügen", der es ermöglicht, Medien manuell hinzuzufügen. Hierbei wird ein Dialog geöffnet, in dem die Signaturen der Medien eingetragen werden können. Die Medien werden dann dem Apparat hinzugefügt.
### Kontextmenü
Mit einem Rechtsklick auf ein Medium wird ein Kontextmenü geöffnet, das folgende Optionen enthält (Mehrfachauswahl der Medien mit Strg + Linksklick möglich):
![Kontextmenü](images/media_context_menu_root.png)
#### Subbereich Allgemeines
![Kontextmenü](images/media_context_menu_general.png)
- Bearbeiten: Öffnet ein Fenster, in dem die Metadaten des Mediums eingesehen bzw, bearbeitet werden können. (s. [Metadaten bearbeiten](edit_media.md))
- Löschen: Löscht das Medium aus dem Apparat. Das Medium wird nicht gelöscht, sondern nur aus dem Apparat entfernt. (s. [Bild](images.md#Exemplar löschen))
#### Subbereich Apparate
![Kontextmenü](images/media_context_menu_apparats.png)
- Zum Apparat hinzufügen: *Noch nicht implementiert* (derzeit deaktiviert) Fügt das Medium dem Apparat in aDIS hinzu
- In Apparat verschieben: Öffnet ein Fenster, in dem ein anderer Apparat ausgewählt werden kann, in den die ausgewählten Medien verschoben werden sollen.
- In Apparat kopieren: Öffnet ein Fenster, in dem ein anderer Apparat ausgewählt werden kann, in den die ausgewählten Medien kopiert werden sollen.
## Medien hinzufügen
![Medien hinzufügen](images/add_media.png)
Um Medien hinzuzufügen, müssen die Signaturen der Medien in das Textfeld eingetragen werden. Jede Signatur muss in die Zeile eingegeben werden und mit Enter bestätigt werden.
Nachdem alle Signaturen hinzugefügt werden, können folgende Optionen gesetzt werden:
- Modus: Gibt an, welche Metadaten verwendet werden. Die beiliegende Tabelle gibt an, welche Metadaten welche Angaben enthalten.
- Jedes Buch verwenden: Diese Option ermöglicht es, Medien hinzuzufügen, die noch nicht im Apparat sind
- Exakte Signatur: Diese Option teilt dem System mit, dass genau diese Signatur verwendet werden muss. Ist diese Option nicht gesetzt, wird nach der Signatur gesucht, die am ehesten der eingegebenen Signatur entspricht. (Das gefundene Buch ist das gleiche, nur evtl. ein anderes Exemplar)
Mit dem Knopf "Ok" werden die Medien gesucht und hinzugefügt.

View File

@@ -1,7 +0,0 @@
# Metadaten bearbeiten
![Metadaten bearbeiten](images/edit_book_metadata.png)
In diesem Fenster können die Metadaten eines Mediums bearbeitet werden. Diese bearbeitung macht Sinn, wenn Angaben nicht direkt aus dem Katalog übernommen werden konnten, oder wenn die Angaben nicht korrekt sind.
Über den Knopf "Ok" werden die geänderten Metadaten gespeichert und das Fenster geschlossen. Über den Knopf "Abbrechen" werden die Änderungen verworfen und das Fenster geschlossen.

View File

@@ -1,38 +0,0 @@
# ELSA
![ELSA](images/elsa_main.png)
## ELSA anlegen
Um einen ELSA zu erstellen, muss der Knopf **Neuer Auftrag** gedrückt werden. Das Feld *Auftragsdaten* wird zum bearbeiten freigeschaltet, der Fokus wird automatisch auf das Feld **Prof.** gesetzt.
Hier werden automatisch alle bereits vorhandenen Professoren der Semesterapparate eingetragen. Sollte der Professor noch keinen Apparat haben, so kann der Name manuell eingetragen werden. Es wird nun ein neues Element angezeigt:
![Kontaktdaten](images/new_prof_elsa_fields.png)
Solange diese Felder nicht ausgefüllt sind, kann der Auftrag nicht gespeichert werden.
Um den ELSA zu speichern, müssen alle Felder ausgefüllt sein. Nach dem Speichern wird der ELSA in der Tabelle angezeigt. Über das Icon ![Icon](images/icon_cal.png) können der aktuelle Tag und das aktuelle Semester eingetragen werden.
### Dokumente hinzufügen
![Dokumenttabelle](images/elsa_docs.png)
Hat der Professor ein passendes Formular geliefert, so kann dieses über den Knopf **Dokument hinzufügen** hinzugefügt werden. Das Dokument wird in der Datenbank gespeichert und kann über den Knopf **Dokument öffnen** geöffnet werden. Über den Knopf **Medien aus Dokument hinzufügen** werden alle erkannten Medien aus dem Dokument in die Tabelle eingetragen und können zitiert werden.
Sollte der Professor mehrere Segmente aus einem Medium in einer Zeile gelistet haben, so können diese als seperate Einträge hinzugefügt werden. Dazu muss ein Haken bei **Abschnitte trennen** gesetzt sein.
!!! Warn "Hinweis: Datenformat im Dokument"
Um die Abschnitte erfolgreich zu trennen, müssen diese durch ein Semikolon getrennt sein.
Beispiel: `1-5; 18-25; 30-35`
Durch den Klick auf den Knopf **Medien aus Dokument hinzufügen** wird der Auftrag automatisch gespeichert, die Medien, bzw Abschnitte werden in der Tabelle angezeigt.
### Einträge zitieren
Da alle gescannten Dokumente später auf Illias hochgeladen werden, gibt es die Funktion **Eintrag zitieren**. Für diese Funktion muss ein Eintrag in der Tabelle ausgewählt werden. Über den Knopf **Eintrag zitieren** wird ein Dialog geöffnet, in dem der Eintrag zitiert werden kann. Die Angegebene Seitenzahl wird automatisch übernommen.
Genauere Beschreibung: [Zitieren](citing.md)

86
docs/elsa/anlegen.md Normal file
View File

@@ -0,0 +1,86 @@
# ELSA anlegen
![ELSA Hauptoberfläche](../images/elsa_main.png){ loading=lazy }
---
## Neuen ELSA erstellen
### Schritte
1. Klicken Sie auf **Neuer Auftrag**
2. Das Feld *Auftragsdaten* wird freigeschaltet
3. Der Fokus wechselt automatisch auf das Feld **Prof.**
### Professorenauswahl
Im Feld **Prof.** werden automatisch alle bereits vorhandenen Dozenten aus den Semesterapparaten angezeigt.
!!! info "Neuer Professor"
Wenn der Professor noch keinen Apparat hat, kann der Name manuell eingetragen werden. Es erscheinen zusätzliche Felder:
![Kontaktdaten für neuen Professor](../images/new_prof_elsa_fields.png){ loading=lazy }
| Feld | Beschreibung | Pflicht |
|------|--------------|---------|
| **Name** | Name des Dozenten | :material-check: |
| **E-Mail** | Kontakt-E-Mail | :material-check: |
| **Telefon** | Telefonnummer | :material-check: |
!!! warning "Pflichtfelder"
Solange diese Felder nicht ausgefüllt sind, kann der Auftrag **nicht** gespeichert werden.
---
## Schnelleingabe
Über das Kalender-Icon :octicons-calendar-24: können automatisch eingetragen werden:
- [x] Aktuelles Datum
- [x] Aktuelles Semester
---
## Dokumente hinzufügen
![Dokumenttabelle](../images/elsa_docs.png){ loading=lazy }
### Formular hinzufügen
1. Klicken Sie auf **Dokument hinzufügen**
2. Wählen Sie die Datei aus
3. Das Dokument wird in der Datenbank gespeichert
### Medien extrahieren
Über **Medien aus Dokument hinzufügen** werden alle erkannten Medien automatisch in die Tabelle eingetragen.
!!! tip "Abschnitte trennen"
Hat der Professor mehrere Abschnitte in einer Zeile gelistet, aktivieren Sie **Abschnitte trennen** diese werden dann als separate Einträge hinzugefügt.
!!! warning "Formatierung im Dokument"
Die Abschnitte müssen durch **Semikolon** getrennt sein:
```
1-5; 18-25; 30-35
```
### Automatisches Speichern
Beim Klicken auf **Medien aus Dokument hinzufügen** wird der Auftrag automatisch gespeichert.
---
## Einträge bearbeiten
Die erkannten Medien erscheinen in der Tabelle und können:
- :lucide-edit: Bearbeitet werden
- :lucide-quote: Zitiert werden → [Zum Zitieren](zitieren.md)
- :lucide-trash: Gelöscht werden
---
## Dokument öffnen
Über **Dokument öffnen** kann das hinzugefügte Formular jederzeit eingesehen werden. Es wird hierfür eine temporäre Datei erstellt und im entsprechenden Program (Bspw: Word, Excel, Email) geöffnet.

46
docs/elsa/index.md Normal file
View File

@@ -0,0 +1,46 @@
# ELSA
**ELSA** steht für **E**lektronischer **L**ehr-**S**emester**a**pparat und ermöglicht die Verwaltung digitaler Literatur für Lehrveranstaltungen.
<div class="grid cards" markdown>
- :lucide-file-plus:{ .lg .middle } **ELSA anlegen**
---
Erstellen Sie einen neuen elektronischen Semesterapparat.
[:octicons-arrow-right-24: ELSA anlegen](anlegen.md)
- :lucide-quote:{ .lg .middle } **Zitieren**
---
Erstellen Sie automatische Zitate für Ihre digitalen Medien.
[:octicons-arrow-right-24: Zum Zitieren](zitieren.md)
</div>
---
## Was ist ELSA?
!!! abstract "Definition"
Ein elektronischer Semesterapparat (ELSA) ist eine digitale Sammlung von Literatur, die auf Illias bereitgestellt wird. Die Dokumente werden gescannt und mit korrekten Zitationen versehen.
### Workflow
```mermaid
graph TD
A[ELSA anlegen] --> B[Dokumente hinzufügen]
B --> C[Medien aus Dokument extrahieren]
C --> D[Einträge zitieren]
D --> E[Dateien auf Illias hochladen]
```
### Voraussetzungen
- [x] Konfigurierter Zotero-Account
- [x] Eingescannte Dokumente oder Formulare
- [x] Informationen zum Dozenten und zur Veranstaltung

95
docs/elsa/zitieren.md Normal file
View File

@@ -0,0 +1,95 @@
# Zitieren
Da alle gescannten Dokumente später auf **Illias** hochgeladen werden, bietet der SemesterapparatsManager eine automatische Zitierfunktion.
---
## Zitierstil
!!! abstract "Verwendeter Stil"
Es wird der Zitierstil der **Deutschen Gesellschaft für Psychologie (DGPs)** verwendet.
---
## Oberfläche
![Zitieroberfläche](../images/generate_quote_emtpy.png){ loading=lazy }
### Zitat erstellen
1. Wählen Sie einen Eintrag in der ELSA-Tabelle aus
2. Klicken Sie auf **Eintrag zitieren**
3. Der Dialog öffnet sich und sucht automatisch im Katalog
!!! tip "Seitenzahlen"
Die angegebene Seitenzahl aus der Tabelle wird automatisch übernommen. Bei mehreren Abschnitten muss die Seitenzahl ggf. angepasst werden.
---
## Zitierdialog
![Zitierdialog mit Ergebnis](../images/quote_search_result.png){ loading=lazy }
Nach dem Öffnen werden automatisch relevante Daten aus dem Katalog abgefragt und in die Felder eingetragen.
### Aktionen
| Knopf | Funktion |
|-------|----------|
| :lucide-search: **Suchen** | Sucht im Katalog nach dem Identifikator |
| :lucide-quote: **Zitat erstellen** | Verbindet mit Zotero und erstellt das Zitat |
| :lucide-check: **Ok** | Schließt den Dialog |
| :lucide-trash: **Discard** | Entfernt alle Eingaben |
| :lucide-x: **Abbrechen** | Schließt den Dialog |
| :lucide-rotate-ccw: **Wiederholen** | Zurück zur Eingabe für erneute Suche |
---
## Zitat-Erstellung
```mermaid
sequenceDiagram
participant U as Benutzer
participant S as SemesterapparatsManager
participant Z as Zotero
U->>S: Klick "Zitat erstellen"
S->>Z: Werk anlegen
Z-->>S: Werk-ID
S->>Z: Zitat abrufen
Z-->>S: Formatiertes Zitat
S->>U: Zitat anzeigen
```
### Ablauf
1. Klicken Sie auf **Zitat erstellen**
2. Eine Verbindung zu Zotero wird hergestellt
3. Das Werk wird in Ihrer Zotero-Bibliothek angelegt
4. Das formatierte Zitat wird zurückgegeben
---
## Generierte Zitate
![Generierte Zitate](../images/quote_generated.png){ loading=lazy }
Nach erfolgreicher Erstellung wechselt der Dialog automatisch zur Zitat-Ansicht.
### Verwendung
Die generierten Zitate können:
- :lucide-copy: In die Zwischenablage kopiert werden
- :lucide-file-text: Als Dateiname für das gescannte Dokument verwendet werden
- :lucide-upload: Zusammen mit dem Dokument auf Illias hochgeladen werden
---
## Voraussetzungen
!!! warning "Zotero-Konfiguration erforderlich"
Für die Zitierfunktion muss Zotero konfiguriert sein:
1. Erstellen Sie einen API-Key in Ihrem Zotero-Account
2. Tragen Sie den Key in den [Einstellungen](../allgemein/hauptoberflaeche.md#zotero) ein

View File

@@ -1,12 +0,0 @@
# Verlängerung
Ein Dialog zum Verlängern eines Apparates.
![Verlängerung](images/extend.png)
Zum Verlängern muss ein Semester ausgewählt, und ein Jahr eingetragen sein. Die Checkbox für den Dauerapparat kann angekreuzt werden, um den Apparat als Dauerapparat zu markieren.
!!! Info "Info Dauerapparat"
Damit der Apparat als Dauerapparat verlängert werden kann, muss ein Semester angegeben werden.
Nach dem Speichern wird das Semester automatisch angepasst und in den entsprechenden Tabellen angezeigt.

View File

@@ -1,14 +0,0 @@
# Bilder
## Admin Aktionen
![Bild](images/actions.png)
## Apparatscheckliste
![Bild](images/checklist.png)
## Medien hinzufügen
![Bild](images/add_media.png)
## Kalendar
![Bild](images/calendar.png)

View File

@@ -1,3 +0,0 @@
# ToC
>TBD

View File

@@ -1,47 +0,0 @@
# Mails
Der SemesterapparatsManager hat die Option, den Dozenten beim erstellen eines Apparates eine Mail zu schicken. Diese Mail enthält die Information, dass der Apparat erstellt wurde und wie er aufgerufen werden kann.
Zusätzlich kann beim Löschen eines Apparates eine Mail an den Dozenten geschickt werden. Diese Mail enthält die Information, dass der Apparat gelöscht wurde.
Über eine neue Oberfläche können neue Mail Templates erstellt werden.
## Mail
Abhängig davon, ob ein Apparat erstellt oder gelöscht wird/wurde, wird automatisch ein Template geladen. Somit muss nur noch die Anrede und der Text angepasst werden.
Bsp:
![Mail](images/mail_delete.png)
!!! Info
Die Felder **eMail** und **Prof** sind schreibgeschützt und können nicht verändert werden.
Über das Dropdown Menü **Art** kann das entsprechende Template ausgewählt werden. Der Knopf recht danaben kann für das [erstellen](#neue-mail-templates) neuer Templates genutzt werden.
Um die Mail abschicken zu können, muss die Anrede mithilfe der Knöpfe über dem Mailtext konfiguriert werden. Die Anrede passt sich automatisch an, basierend auf der angegebenen Anrede.
Wird die Mail erfolgreich verschickt, schließt sich das Fenster automatisch, und es wird eine Kopie der Mail an die Mail semesterapparate@ph-freiburg.de geschickt.
## Neue Mail Templates
Über den Knopf rechts neben der Auswahl der Email Templates können neue Templates erstellt werden.
![MailTemplate](images/mail_create_template.png)
Diese Oberfläche bietet die Möglichkeit, ein neues Template zu erstellen. Mithilfe des Dropdowns **Platzhalter** können Platzhalter ausgewählt und eingefügt werden. Bevor das Template gespeichert werden kann, muss dieses getestet werden. Sollte der Test erfolgreich sein, kann ein Name vergeben werden, das Template wird nun in dem Dropdown angezeigt.
### Template testen
Sollten Angaben fehlen, wird eine Fehlermeldung angezeigt:
![TemplateTest](images/mail_create_missing.png)
!!! info
Die Fehlermeldung ist dynamisch und gibt immer die fehlenden Angaben an.
### Template speichern
Ist das Template fehlerfrei, wird folgender Dialog angezeigt:
![TemplateSave](images/mail_set_template_name.png)

View File

@@ -1,27 +0,0 @@
# Hauptoberfläche
![Hauptoberfläche](images/mainUI.png)
Die Hauptoberfläche des SemesterapparatsManager besteht aus drei Hauptbereichen:
- **Anlegen**: Auf dieser Seite können neue Semesterapparate angelegt werden, bestehende Apparate aktualisiert oder gelöscht werden. Weitere Informationen siehe: [Anlegen](create.md)
- **Suchen/Statistik**: Hier können Semesterapparate gesucht, gefiltert, gelöscht und verlängert werden. Zudem werden Statistiken zum erstellen / löschen von Semesterapparaten angezeigt. Ein zweiter Tab ermöglicht die Suche der Medien in den Semesterapparaten. Weitere Informationen siehe: [Suchen / Statistik](search.md)
- **ELSA**: Hier können ELSA Apparate angelegt werden, und entsprechende Dateinamen, Beschreibungen und Zitate erstellt werden. Weitere Informationen siehe: [ELSA](elsa.md)
## Übersichtstabelle
![Übersichtstabelle](images/main_overview.png)
In diesem Bereich werden alle erstellten Semesterapparate angezeigt. Über einen Doppelklick auf einen Apparat werden alle Details geladen und in den Apparatdetails angezeigt. Hier können dann auch einige Angaben geändert werden.
Weitere Infos siehe: [Anlegen](create.md)
### Knöpfe
Dieser Bereich hat folgende Knöpfe:
- **Übersicht erstellen**: Erstellt eine Übersicht der angezeigten Apparate, welche am Regal ausgehängt werden kann. Diese Übersicht wird per Mail an den Drucker geschickt. Vor dem Drucken erfolgt einen Bestätigungsabfrage.
- **neu. App anlegen**: Schaltet die Apparatdetails frei, um einen neuen Apparat anzulegen. Weiteres siehe: [Anlegen](create.md)
- **Auswahl abbrechen**: Entfernt alle Daten aus den Apparatsdetails und schaltet diese wieder auf inaktiv.

View File

@@ -1,106 +0,0 @@
# Suche und Statistik
![Suche und Statistik](images/main_search_stat.png)
Auf dieser Seite gibt es zwei Hauptfunktionen: die Suche und die Statistik. Standardmäßig wird die Statistik geöffnet.
## Statistikbereich
### Suche
![Statistiksuche](images/ss_stat_search.png)
In diesem Bereich kann die Suche nach Semesterapparaten durchgeführt werden. Suchoptionen sind:
- **Appnr**: Die Nummer des Semesterapparates, die Auswahl zeigt alle belegten Semesterapparat an
- **Person**: Der Name des Dozenten, der den Semesterapparat erstellt hat
- **Fach**: Das Fach des Semesterapparates
- **Erstell-** und **Endsemester**: Semester, in denen der Semesterapparat erstellt wurde, bzw enden soll
- **Dauerapp**: Alle Apparate, die als Dauerapparat vermerkt sind
- **Löschbar**: Überschreibt alle vorhergehenden Parameter und zeigt alle Semesterapparate an, die gelöscht werden können
!!! Info
Um alle Semesterapparate anzuzeigen, kann die Suche ohne Eingabe gestartet werden.
Die Suche kann durch Klicken auf den Button **Suchen** gestartet werden. Die Ergebnisse werden in der Tabelle darunter angezeigt.
### Suchergebnisse
!!! Info
Der Ergebnisbereich kann über den Vertikalen Slider verschoben werden, um mehr Platz für Tabelle, oder den Graphen zu schaffen. Hierzu mit der Maus auf den Raum zwischen den beiden Bereichen klicken und ziehen.
![Statistiksuchergebnisse](images/ss_stat_result.png)
In diesem Bereich werden die Suchergebnisse angezeigt. Für jeden gefundenen Treffer wird eine Zeile angelegt:
![Statistiksuchergebnisse](images/ss_search_entries.png)
Angezeigt werden:
- **Checkbox**
- **Apparatsname**
- **Apparatsnummer**
- **Person**
- **Fach**
!!! failure "Info: Gelöschte Apparate"
Gelöschte Apparate werden in der Tabelle mit rotem Hintergrund angezeigt. (s. Ausgewählte Löschen)
Über der Tabelle sind zwei Knöpfe: **Ausgewählte Löschen** und **Ausgewählte Benachrichtigen**
Um diese Aktionen auszuführen, muss mindestens eine Checkbox bei einem Apparat angekreuzt sein.
#### Ausgewählte Löschen
![Statistiksuchergebnisse](images/ss_stat_delete.png)
Nach dem Klicken auf den Button **Ausgewählte Löschen** wird jeder ausgewählte Apparat gelöscht. Die gelöschten Apparate werden in der Tabelle mit rotem Hintergrund angezeigt.
#### Ausgewählte Benachrichtigen
Mit dem Klick auf den Button wird ein neues Fenster geöffnet:
![Statistiksuchergebnisse](images/mail_delete.png)
Bevor die Mail abgeschickt werden kann, muss die Anrede konfiguriert werden. Weitere Infors finden Sie im Abschnitt [Mails](mail.md).
#### Kontextmenü
Diese Tabelle bietet auch ein Rechtsklickmenu mit folgenden Optionen:
- **Verlängern**: Öffnet den [Verlängerungsdialog](extend.md)
- **Wiederherstellen**: Stellt einen gelöschten Apparat wieder her
!!! Info "Info: Wiederherstellen"
Diese Option kann für einen oder mehrere gelöschte Apparate verwendet werden. Für mehrere Apprate müssen die entsprechenden Zeilen mit Strg+Klick auf die Zeilennummer markiert werden.
### Apparatsstatistik
Rechts neben der Tabelle wird die Statistik der gefundenen Apparate angezeigt:
![Statistiksuchergebnisse](images/ss_stats_table.png)
Hierbei werden die Angaben sowohl in einer Tabelle als auch in einem Diagramm dargestellt.
#### Tabelle
In der Tabelle werden alle Verwendeten Semester agegeben, in denen ein Apparat entweder erstellt oder gelöscht wurde.
Über einen Doppelklick auf ein Semester werden die Apparate angezeigt, die in diesem Semester erstellt oder gelöscht wurden.
![DetailedView](images/ss_stats_detailed.png)
Ein Klick auf das `>` Symbol einer Person zeigt alle erstellten oder gelöschten Apparate der Person an. Ein Doppelklick auf den erstellten Apparat wechselt die Ansicht zur [Hauptoberfläche](mainUI.md) und zeigt alle Daten an.
!!! Info "Info: Gelöschte Apparate"
Gelöschte Apparate können nicht angezeigt werden, die Doppelklick Funktion ist hier deaktiviert.
#### Diagramm
Das Diagramm zeigt die Anzahl der erstellten und gelöschten Apparate in einem Liniendiagramm an.
![Diagramm](images/ss_stats_graph.png)
## Suchbereich
Der Suchbereich kann verwendet werden, um zu prüfen, ob ein Exemplar in einem Apparat vorhanden ist. Mögliche Suchkriterien sind:
- **Titel**: Der Titel des Exemplars (Trunkierung wurd automaticsh durchgeführt)
- **Signatur**: Die Signatur des Exemplars (Trunkierung wurd automaticsh durchgeführt)
Über den Knopf **Suchen** wird die Suche gestartet. Die Ergebnisse werden in der Tabelle darunter angezeigt.
!!! Info "Info: Exemplarsuche"
Im Vergleich zur Apparatssuche kann hier keine Leere Suche durchgeführt werden, da ggf. zu viele Ergebnisse angezeigt werden können.

View File

@@ -0,0 +1,144 @@
# Semesterapparat anlegen
Um einen neuen Semesterapparat anzulegen, muss zuerst der Knopf **neu. App anlegen** gedrückt werden. Das Feld der Apparatdetails wird nun zum Bearbeiten entsperrt.
![Anlegen](../images/main_apparatdetails.png){ loading=lazy }
---
## Apparatdetails
### Pflichtfelder
Um einen Apparat erfolgreich anzulegen, müssen alle Felder mit einem **Sternchen (*)** ausgefüllt werden. Ist eine Eingabe nicht valide, wird neben der Eingabe ein :material-close-circle:{ style="color: red" } angezeigt.
!!! warning "Formatierungsanforderungen"
Folgende Felder haben spezielle Formatierungsanforderungen:
| Feld | Format | Beispiel |
|------|--------|----------|
| **Prof. Name** | Nachname, Vorname | `Müller, Hans` |
| **Mail** | Gültige E-Mail-Adresse | `mueller@ph-freiburg.de` |
| **Tel** | Mindestens 3 Ziffern | `0761-12345` |
| **Semester** | Automatisch berechnet | siehe unten |
### Semester-Logik
Das Semester wird automatisch wie folgt berechnet:
=== "Wintersemester"
- Checkbox **Winter** aktivieren
- **Jahr**:
- OktoberDezember → aktuelles Jahr
- JanuarSeptember → aktuelles Jahr - 1
=== "Sommersemester"
- Checkbox **Sommer** aktivieren
- **Jahr**: aktuelles Jahr (MärzAugust)
### Fehlermeldungen
Beim Versuch, den Apparat zu speichern, bevor alle Felder korrekt ausgefüllt sind, erscheint eine Fehlermeldung:
![Fehler](../images/main_apparatdetails_error.png){ loading=lazy }
!!! tip "Tipp"
Über **Ok** oder **Cancel** wird die Meldung geschlossen und der Apparat kann weiter bearbeitet werden.
---
## Dokumente hinzufügen
Dem Apparat können Dokumente hinzugefügt werden. Besonders hilfreich ist das Hinzufügen der **Antragsformulare**, da der SemesterapparatsManager diese automatisch auslesen kann.
```mermaid
graph LR
A[Dokument hinzufügen] --> B{Nächste Aktion}
B -->|"Daten [...] übernehmen"| C[Medien aus Dokument hinzufügen]
B -->|Nein| D[Als Referenz speichern]
C --> E[Medien werden automatisch erkannt]
```
1. Klicken Sie auf **Dokumente hinzufügen**
2. Wählen Sie die gewünschte(n) Datei(en) aus
3. Bei Antragsformularen: Klicken Sie auf **Medien aus Dokument hinzufügen**
!!! warning "Wichtig: Automatisches Speichern"
Beim Klicken auf **Medien aus Dokument hinzufügen** wird der Apparat automatisch gespeichert. Dies ist erforderlich, da Medien nur einem existierenden Apparat hinzugefügt werden können.
---
## Medienliste
![Optionen der Medienliste](../images/medialist_options.png){ loading=lazy }
In der Medienliste werden alle dem Apparat zugeordneten Medien angezeigt.
### Optionen
| Option | Beschreibung |
|--------|--------------|
| :lucide-eye: **gel. Medien anzeigen** | Zeigt auch gelöschte Medien an |
| :lucide-check-circle: **im Apparat?** | Prüft, ob ausgewählte Medien dem Apparat hinzugefügt wurden |
| :lucide-plus: **Medien hinzufügen** | Manuelle Eingabe von Signaturen |
### Kontextmenü
Mit einem **Rechtsklick** auf ein Medium öffnet sich das Kontextmenü:
![Kontextmenü](../images/media_context_menu_root.png){ loading=lazy }
=== "Allgemeines"
![Kontextmenü Allgemeines](../images/media_context_menu_general.png){ loading=lazy }
- **Bearbeiten**: Metadaten einsehen/bearbeiten
- **Löschen**: Medium aus dem Apparat entfernen (nicht physisch löschen)
=== "Apparate"
![Kontextmenü Apparate](../images/media_context_menu_apparats.png){ loading=lazy }
- **Zum Apparat hinzufügen**: *(derzeit deaktiviert)*
- **In Apparat verschieben**: Medium in anderen Apparat verschieben
- **In Apparat kopieren**: Medium in anderen Apparat kopieren
!!! tip "Mehrfachauswahl"
Mit ++ctrl+left-button++ können mehrere Medien ausgewählt werden.
---
## Medien manuell hinzufügen
![Medien hinzufügen](../images/add_media.png){ loading=lazy }
### Eingabe
Geben Sie jede Signatur in eine neue Zeile ein und bestätigen Sie mit ++enter++.
### Optionen
| Option | Beschreibung |
|--------|--------------|
| **Modus** | Wählt die Metadatenquelle |
| **Jedes Buch verwenden** | Erlaubt das Hinzufügen von Medien, die noch nicht im Apparat sind |
| **Exakte Signatur** | Nur diese spezifische Signatur verwenden (kein alternatives Exemplar) |
!!! info "Signatursuche"
Ohne **Exakte Signatur** wird nach der ähnlichsten Signatur gesucht das gefundene Buch ist dasselbe, aber möglicherweise ein anderes Exemplar.
---
## Apparat speichern
Nach dem Ausfüllen aller Pflichtfelder:
1. Klicken Sie auf **Speichern**
2. Der Apparat wird in der Datenbank gespeichert
3. Optional: Bei aktiviertem **Mail senden** öffnet sich der Mail-Dialog
!!! note "E-Mail-Versand"
Der E-Mail-Versand erfordert konfigurierte [Mail-Zugangsdaten](../allgemein/hauptoberflaeche.md#e-mail).

View File

@@ -0,0 +1,47 @@
# Semesterapparat
In diesem Abschnitt finden Sie alle Informationen zur Verwaltung von Semesterapparaten.
<div class="grid cards" markdown>
- :lucide-book-plus:{ .lg .middle } **Anlegen**
---
Erstellen Sie neue Semesterapparate mit allen erforderlichen Informationen.
[:octicons-arrow-right-24: Apparat anlegen](anlegen.md)
- :lucide-calendar-plus:{ .lg .middle } **Verlängern**
---
Verlängern Sie bestehende Apparate für ein weiteres Semester.
[:octicons-arrow-right-24: Apparat verlängern](verlaengern.md)
- :lucide-trash-2:{ .lg .middle } **Löschen**
---
Entfernen Sie nicht mehr benötigte Semesterapparate.
[:octicons-arrow-right-24: Apparat löschen](loeschen.md)
- :lucide-bar-chart-2:{ .lg .middle } **Statistik**
---
Erhalten Sie Einblicke in die Nutzung der Semesterapparate.
[:octicons-arrow-right-24: Zur Statistik](statistik.md)
- :lucide-search:{ .lg .middle } **Suche**
---
Durchsuchen Sie bestehende Apparate und Medien.
[:octicons-arrow-right-24: Zur Suche](suche.md)
</div>

View File

@@ -0,0 +1,63 @@
# Semesterapparat löschen
Das Löschen von Semesterapparaten erfolgt über die [Suche & Statistik](suche.md) Seite.
---
## Löschvorgang
### Einzelnen Apparat löschen
1. Navigieren Sie zu **Suchen/Statistik**
2. Suchen Sie den gewünschten Apparat
3. Aktivieren Sie die Checkbox des Apparats
4. Klicken Sie auf **Ausgewählte Löschen**
### Mehrere Apparate löschen
1. Verwenden Sie die Suche mit dem Filter **Löschbar**
2. Wählen Sie alle zu löschenden Apparate aus
3. Klicken Sie auf **Ausgewählte Löschen**
!!! tip "Filter: Löschbar"
Der Filter **Löschbar** zeigt alle Apparate an, deren Endsemester abgelaufen ist und die zur Löschung vorgemerkt werden können.
---
## Bestätigung
Nach dem Klicken auf **Ausgewählte Löschen**:
![Löschbestätigung](../images/ss_stat_delete.png){ loading=lazy }
Die gelöschten Apparate werden in der Tabelle mit **rotem Hintergrund** angezeigt.
!!! failure "Gelöschte Apparate"
Gelöschte Apparate verbleiben in der Datenbank, werden aber als inaktiv markiert. Sie können bei Bedarf wiederhergestellt werden.
---
## Dozenten benachrichtigen
Bei der Löschung kann eine Benachrichtigung an den Dozenten versendet werden:
![Mail bei Löschung](../images/mail_delete.png){ loading=lazy }
1. Wählen Sie die zu löschenden Apparate aus
2. Klicken Sie auf **Ausgewählte Benachrichtigen**
3. Konfigurieren Sie die Anrede
4. Versenden Sie die Mail
Weitere Informationen: [Mails](../allgemein/hauptoberflaeche.md#e-mail)
---
## Wiederherstellen
Gelöschte Apparate können wiederhergestellt werden:
1. Suchen Sie den gelöschten Apparat (rot markiert)
2. Rechtsklick → **Wiederherstellen**
!!! success "Mehrfachwiederherstellung"
Mit ++ctrl+left-button++ können mehrere Apparate gleichzeitig ausgewählt und wiederhergestellt werden.

View File

@@ -0,0 +1,86 @@
# Statistik
Die Statistikfunktion bietet einen Überblick über alle Semesterapparate und deren Entwicklung über die Zeit.
![Statistikbereich](../images/ss_stats_table.png){ loading=lazy }
---
## Apparatsstatistik
Die Statistik zeigt alle Semester an, in denen Apparate erstellt oder gelöscht wurden.
### Tabellenansicht
| Spalte | Beschreibung |
|--------|--------------|
| **Semester** | Das betroffene Semester |
| **Erstellt** | Anzahl erstellter Apparate |
| **Gelöscht** | Anzahl gelöschter Apparate |
!!! tip "Detailansicht"
Mit einem **Doppelklick** auf ein Semester werden die einzelnen Apparate angezeigt.
### Detaillierte Ansicht
![Detailansicht](../images/ss_stats_detailed.png){ loading=lazy }
Die Detailansicht zeigt:
- Alle Personen, die in diesem Semester Apparate erstellt/gelöscht haben
- Pro Person: Liste aller erstellten oder gelöschten Apparate
??? info "Navigation"
- Klick auf :material-chevron-right: zeigt die Apparate einer Person
- Doppelklick auf einen Apparat wechselt zur [Hauptoberfläche](../allgemein/hauptoberflaeche.md)
!!! warning "Gelöschte Apparate"
Gelöschte Apparate können nicht angezeigt werden die Doppelklick-Funktion ist dort deaktiviert.
---
## Diagramm
![Statistik-Diagramm](../images/ss_stats_graph.png){ loading=lazy }
Das Liniendiagramm visualisiert:
- :material-chart-line:{ style="color: green" } **Grün**: Erstellte Apparate
- :material-chart-line:{ style="color: red" } **Rot**: Gelöschte Apparate
!!! tip "Interaktivität"
Hovern Sie über Datenpunkte für genaue Werte.
---
## Auswertungen
### Typische Fragestellungen
<div class="grid cards" markdown>
- :lucide-trending-up: **Wachstum**
---
Wie viele Apparate wurden pro Semester erstellt?
- :lucide-users: **Nutzung**
---
Welche Dozenten nutzen den Service am meisten?
- :lucide-calendar: **Saisonalität**
---
Gibt es Unterschiede zwischen Sommer- und Wintersemester?
- :lucide-archive: **Bereinigung**
---
Wie viele Apparate werden regelmäßig gelöscht?
</div>

View File

@@ -0,0 +1,109 @@
# Suche
![Suche und Statistik](../images/main_search_stat.png){ loading=lazy }
Die Suchseite bietet zwei Hauptfunktionen: die **Apparatsuche** und die **Mediensuche**.
---
## Apparatsuche
### Suchfilter
![Statistiksuche](../images/ss_stat_search.png){ loading=lazy }
| Filter | Beschreibung |
|--------|--------------|
| **AppNr** | Nummer des Semesterapparates (Dropdown mit allen belegten Nummern) |
| **Person** | Name des Dozenten |
| **Fach** | Fachrichtung des Apparates |
| **Erstell-Semester** | Semester der Erstellung |
| **End-Semester** | Geplantes Ende des Apparates |
| **Dauerapp** | Nur Dauerapparate anzeigen |
| **Löschbar** | Alle löschbaren Apparate (überschreibt andere Filter) |
!!! tip "Alle anzeigen"
Starten Sie die Suche ohne Eingabe, um **alle** Semesterapparate anzuzeigen.
---
## Suchergebnisse
![Suchergebnisse](../images/ss_stat_result.png){ loading=lazy }
### Ergebnistabelle
Für jeden Treffer wird angezeigt:
- :material-checkbox-marked-outline: Checkbox zur Auswahl
- **Apparatsname**
- **Apparatsnummer**
- **Person**
- **Fach**
!!! failure "Gelöschte Apparate"
Gelöschte Apparate werden mit **rotem Hintergrund** angezeigt.
### Slider
!!! info "Layout anpassen"
Der vertikale Slider zwischen Tabelle und Graph kann verschoben werden, um mehr Platz für einen der Bereiche zu schaffen.
---
## Aktionen
### Ausgewählte Löschen
![Löschen](../images/ss_stat_delete.png){ loading=lazy }
1. Aktivieren Sie die Checkboxen der zu löschenden Apparate
2. Klicken Sie auf **Ausgewählte Löschen**
3. Gelöschte Apparate werden rot markiert
Weitere Informationen: [Apparat löschen](loeschen.md)
### Ausgewählte Benachrichtigen
![Mail bei Löschung](../images/mail_delete.png){ loading=lazy }
Versendet eine E-Mail an die Dozenten der ausgewählten Apparate.
1. Wählen Sie die Apparate aus
2. Klicken Sie auf **Ausgewählte Benachrichtigen**
3. Konfigurieren Sie die Anrede
4. Versenden Sie die Mail
---
## Kontextmenü
Rechtsklick auf einen Apparat öffnet das Kontextmenü:
| Option | Beschreibung |
|--------|--------------|
| :lucide-calendar-plus: **Verlängern** | Öffnet den [Verlängerungsdialog](verlaengern.md) |
| :lucide-undo: **Wiederherstellen** | Stellt gelöschte Apparate wieder her |
!!! tip "Mehrfachauswahl"
Mit ++ctrl+left-button++ auf die Zeilennummer können mehrere Apparate für die Wiederherstellung ausgewählt werden.
---
## Mediensuche
Der **Suchbereich** prüft, ob ein Exemplar in einem Apparat vorhanden ist.
### Suchkriterien
| Kriterium | Beschreibung |
|-----------|--------------|
| **Titel** | Titel des Exemplars (automatische Trunkierung) |
| **Signatur** | Signatur des Exemplars (automatische Trunkierung) |
!!! warning "Pflichtfelder"
Im Gegensatz zur Apparatsuche kann hier **keine** leere Suche durchgeführt werden, da zu viele Ergebnisse möglich wären.
### Ergebnisse
Die gefundenen Exemplare werden mit den zugehörigen Apparaten angezeigt.

View File

@@ -0,0 +1,44 @@
# Semesterapparat verlängern
Ein Dialog zum Verlängern eines Semesterapparates für ein weiteres Semester.
![Verlängerung](../images/extend.png){ loading=lazy }
---
## Verlängerung durchführen
### Schritte
1. Wählen Sie den zu verlängernden Apparat in der [Suche](suche.md) oder [Statistik](statistik.md) aus
2. Öffnen Sie den Verlängerungsdialog via Rechtsklick → **Verlängern**
3. Wählen Sie das Zielsemester aus
4. Klicken Sie auf **Speichern**
### Eingabefelder
| Feld | Beschreibung |
|------|--------------|
| **Semester** | Sommer- oder Wintersemester auswählen |
| **Jahr** | Das Jahr des neuen Semesters |
| **Dauerapparat** | :lucide-check: Markiert den Apparat als Dauerapparat |
---
## Dauerapparat
!!! info "Was ist ein Dauerapparat?"
Ein Dauerapparat wird nicht automatisch zur Löschung vorgemerkt und bleibt aktiv, bis er manuell gelöscht wird.
!!! warning "Hinweis"
Damit der Apparat als Dauerapparat verlängert werden kann, **muss** trotzdem ein Semester angegeben werden.
---
## Nach der Verlängerung
Nach dem Speichern:
- [x] Das Endsemester wird automatisch aktualisiert
- [x] Die Änderung erscheint in allen relevanten Tabellen
- [x] Optional: Benachrichtigung an den Dozenten versenden

51
docs/sonstiges/bilder.md Normal file
View File

@@ -0,0 +1,51 @@
# Bilder
Eine Sammlung von Screenshots und Referenzbildern aus dem SemesterapparatsManager.
---
## Admin-Bereich
### Admin-Aktionen
![Admin Aktionen](../images/actions.png){ loading=lazy }
Die Admin-Aktionen ermöglichen administrative Aufgaben wie Datenbankwartung und Systemkonfiguration.
---
## Semesterapparat
### Apparatscheckliste
![Checkliste](../images/checklist.png){ loading=lazy }
Die Checkliste zeigt den aktuellen Status eines Semesterapparates an.
### Medien hinzufügen
![Medien hinzufügen](../images/add_media.png){ loading=lazy }
Dialog zum manuellen Hinzufügen von Medien über ihre Signaturen.
---
## Eingabehilfen
### Kalender
![Kalender](../images/calendar.png){ loading=lazy }
Der Kalender ermöglicht die schnelle Auswahl von Datum und Semester.
---
## Weitere Screenshots
!!! tip "Screenshots in der Dokumentation"
Detaillierte Screenshots finden Sie in den jeweiligen Abschnitten:
- [Hauptoberfläche](../allgemein/hauptoberflaeche.md) Übersicht der Benutzeroberfläche
- [Semesterapparat anlegen](../semesterapparat/anlegen.md) Erstellung von Apparaten
- [ELSA anlegen](../elsa/anlegen.md) Elektronische Semesterapparate
- [Zitieren](../elsa/zitieren.md) Zitat-Dialog

15
docs/sonstiges/index.md Normal file
View File

@@ -0,0 +1,15 @@
# Sonstiges
Zusätzliche Informationen und Ressourcen zum SemesterapparatsManager.
<div class="grid cards" markdown>
- :lucide-image:{ .lg .middle } **Bilder**
---
Bildergalerie mit Screenshots und Referenzbildern.
[:octicons-arrow-right-24: Zur Galerie](bilder.md)
</div>

View File

@@ -1,30 +0,0 @@
site_name: SemesterapparatsManager
theme:
features:
- search.suggest
- search.highlight
name: material
icon:
admonition:
note: fontawesome/solid/note-sticky
abstract: fontawesome/solid/book
info: fontawesome/solid/circle-info
tip: fontawesome/solid/bullhorn
success: fontawesome/solid/check
question: fontawesome/solid/circle-question
warning: fontawesome/solid/triangle-exclamation
failure: fontawesome/solid/bomb
danger: fontawesome/solid/skull
bug: fontawesome/solid/robot
example: fontawesome/solid/flask
quote: fontawesome/solid/quote-left
markdown_extensions:
- admonition
- pymdownx.details
- pymdownx.superfences
- tables
extra_css:
- stylesheets/extra.css
plugins:
- search

View File

@@ -1,12 +1,13 @@
[project]
name = "semesterapparatsmanager"
version = "1.0.0"
version = "1.0.2"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"appdirs>=1.4.4",
"beautifulsoup4>=4.13.5",
"bibapi>=0.0.6",
"bump-my-version>=0.29.0",
"charset-normalizer>=3.4.3",
"comtypes>=1.4.9",
@@ -14,21 +15,22 @@ dependencies = [
"docx2pdf>=0.1.8",
"httpx>=0.28.1",
"loguru>=0.7.3",
"mkdocs>=1.6.1",
"mkdocs-material>=9.5.49",
"mkdocs-material-extensions>=1.3.1",
"natsort>=8.4.0",
"omegaconf>=2.3.0",
"openai>=1.79.0",
"pandas>=2.2.3",
"pdfquery>=0.4.3",
"playwright>=1.49.1",
"pyramid>=2.0.2",
"pymupdf>=1.26.6",
"flask>=3.1.0",
"pyside6>=6.9.1",
"python-docx>=1.1.2",
"pyzotero>=1.6.4",
"ratelimit>=2.2.1",
"regex>=2025.11.3",
"requests>=2.32.3",
"setuptools>=82.0.0",
"zensical>=0.0.10",
]
[dependency-groups]
@@ -36,37 +38,26 @@ dev = [
"bump-my-version>=0.29.0",
"icecream>=2.1.4",
"nuitka>=2.5.9",
"pytest",
"pytest-cov",
"pyinstaller>=6.17.0",
"ty>=0.0.15",
]
swbtest = [
"alive-progress>=3.3.0",
]
swbtest = ["alive-progress>=3.3.0"]
[tool.ruff]
line-length = 88
target-version = "py313"
[tool.bumpversion]
current_version = "1.0.0"
parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
serialize = ["{major}.{minor}.{patch}"]
search = "{current_version}"
replace = "{new_version}"
regex = false
ignore_missing_version = false
ignore_missing_files = false
tag = true
sign_tags = false
tag_name = "v{new_version}"
tag_message = "Bump version: {current_version} → {new_version}"
allow_dirty = true
commit = true
message = "Bump version: {current_version} → {new_version}"
moveable_tags = []
commit_args = ""
setup_hooks = []
pre_commit_hooks = []
post_commit_hooks = []
[[tool.bumpversion.files]]
filename = "src/__init__.py"
[[tool.bumpversion.files]]
filename = ".version"
[[tool.uv.index]]
url = "https://git.theprivateserver.de/api/packages/WorldTeacher/pypi/simple/"
default = false
name = "gitea"
url = "https://git.theprivateserver.de/api/packages/PHB/pypi/simple/"
[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = "--cov=src --cov-report=term-missing"
[tool.coverage.run]
omit = ["main.py", "test.py", "tests/*", "__init__.py", ]

View File

@@ -1,12 +0,0 @@
[pytest]
# command should be *including --cov to generate coverage report
addopts = --cov
testpaths = tests
python_files = test_*.py
; Configuring pytest
; More info: https://docs.pytest.org/en/6.2.x/customize.html
;Logging
; DATE FORMAT EXAMPLE: %Y-%m-%d %H:%M:%S
; log_cli_format = %(asctime)s %(levelname)-8s %(name)-8s %(message)s
; log_cli_date_format = %H:%M:%S

View File

@@ -1,35 +1,67 @@
__version__ = "1.0.0"
__author__ = "Alexander Kirchner"
__all__ = ["__version__", "__author__", "Icon", "settings"]
__all__ = ["__author__", "__version__", "settings"]
import os
import sys
from pathlib import Path
from typing import Union
from appdirs import AppDirs
from config import Config
def get_app_base_path() -> Path:
"""Get the base path for the application, handling PyInstaller frozen apps."""
if getattr(sys, "frozen", False):
# Running as compiled/frozen
return Path(sys.executable).parent
return Path(__file__).parent.parent
app = AppDirs("SemesterApparatsManager", "SAM")
LOG_DIR: str = app.user_log_dir # type: ignore
CONFIG_DIR: str = app.user_config_dir # type: ignore
if not os.path.exists(LOG_DIR): # type: ignore
os.makedirs(LOG_DIR) # type: ignore
if not os.path.exists(CONFIG_DIR): # type: ignore
os.makedirs(CONFIG_DIR) # type: ignore
# Initialize LOG_DIR and CONFIG_DIR with fallbacks for frozen apps
try:
from appdirs import AppDirs
app = AppDirs("SemesterApparatsManager", "SAM")
_user_log_dir = app.user_log_dir
_user_config_dir = app.user_config_dir
except Exception:
_user_log_dir = None
_user_config_dir = None
# Ensure we always have valid paths
if not _user_log_dir:
_user_log_dir = str(get_app_base_path() / "logs")
if not _user_config_dir:
_user_config_dir = str(get_app_base_path() / "config")
from config import Config # noqa: E402
LOG_DIR: str = _user_log_dir
CONFIG_DIR: str = _user_config_dir
# Create directories if they don't exist
try:
if not Path(LOG_DIR).exists():
os.makedirs(LOG_DIR)
if not Path(CONFIG_DIR).exists():
os.makedirs(CONFIG_DIR)
except Exception:
# Fallback to current directory if we can't create the directories
LOG_DIR = str(get_app_base_path() / "logs")
CONFIG_DIR = str(get_app_base_path() / "config")
Path(LOG_DIR).mkdir(parents=True, exist_ok=True)
Path(CONFIG_DIR).mkdir(parents=True, exist_ok=True)
settings = Config(f"{CONFIG_DIR}/config.yaml")
DATABASE_DIR: Union[Path, str] = ( # type: ignore
app.user_config_dir if settings.database.path is None else settings.database.path # type: ignore
)
if not os.path.exists(DATABASE_DIR): # type: ignore
if not Path(DATABASE_DIR).exists(): # type: ignore
os.makedirs(DATABASE_DIR) # type: ignore
first_launch = settings.exists
if not os.path.exists(settings.database.temp.expanduser()): # type: ignore
if not Path(settings.database.temp.expanduser()).exists(): # type: ignore
settings.database.temp.expanduser().mkdir(parents=True, exist_ok=True) # type: ignore
from .utils.icon import Icon
if not os.path.exists("logs"):
os.mkdir("logs")
if not Path("logs").exists():
Path("logs").mkdir(exist_ok=True)
# open and close the file to create it

View File

@@ -1,22 +0,0 @@
__all__ = [
"AdminCommands",
"AutoAdder",
"AvailChecker",
"BookGrabber",
"Database",
"DocumentationThread",
"NewEditionCheckerThread",
"recreateElsaFile",
"recreateFile",
"Catalogue",
]
from .admin_console import AdminCommands
from .catalogue import Catalogue
from .create_file import recreateElsaFile, recreateFile
from .database import Database
from .documentation_thread import DocumentationThread
from .thread_bookgrabber import BookGrabber
from .thread_neweditions import NewEditionCheckerThread
from .threads_autoadder import AutoAdder
from .threads_availchecker import AvailChecker

View File

@@ -1,110 +0,0 @@
import hashlib
import random
from .database import Database
import loguru
import sys
from src import LOG_DIR
log = loguru.logger
log.remove()
log.add(sys.stdout, level="INFO")
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
# change passwords for apparats, change passwords for users, list users, create and delete users etc
# create a class that has all commands. for each command, create a function that does the thing
class AdminCommands:
"""Basic Admin commands for the admin console. This class is used to create, delete, and list users. It also has the ability to change passwords for users."""
def __init__(self, db_path=None):
"""Default Constructor for the AdminCommands class."""
if db_path is None:
self.db = Database()
else:
self.db = Database(db_path=db_path)
log.info("AdminCommands initialized with database connection.")
log.debug("location: {}", self.db.db_path)
def create_password(self, password: str) -> tuple[str, str]:
"""Create a hashed password and a salt for the password.
Args:
password (str): the base password to be hashed.
Returns:
tuple[str,str]: a tuple containing the hashed password and the salt used to hash the password.
"""
salt = self.create_salt()
hashed_password = self.hash_password(password)
return (hashed_password, salt)
def create_salt(self) -> str:
"""Generate a random 16 digit long salt for the password.
Returns:
str: the randomized salt
"""
return "".join(
random.choices(
"abcdefghijklmnopqrstuvwxyzQWERTZUIOPLKJHGFDSAYXCVBNM0123456789", k=16
)
)
def create_admin(self):
"""Create the admin in the database. This is only used once, when the database is created."""
salt = self.create_salt()
hashed_password = self.hash_password("admin")
self.db.createUser("admin", salt + hashed_password, "admin", salt)
def create_user(self, username: str, password: str, role: str = "user") -> bool:
"""Create a new user in the database.
Args:
username (str): the username of the user to be created.
password (str): the password of the user to be created.
role (str, optional): the role of the user to be created. Defaults to "user".
"""
hashed_password, salt = self.create_password(password)
status = self.db.createUser(
user=username, password=salt + hashed_password, role=role, salt=salt
)
return status
def hash_password(self, password: str) -> str:
"""Hash a password using SHA256.
Args:
password (str): the password to be hashed.
Returns:
str: the hashed password.
"""
hashed = hashlib.sha256((password).encode("utf-8")).hexdigest()
return hashed
def list_users(self) -> list[tuple]:
"""List all available users in the database.
Returns:
list[tuple]: a list of all users, containing all stored data for each user in a tuple.
"""
return self.db.getUsers()
def delete_user(self, username: str):
"""Delete a selected user from the database.
Args:
username (str): the username of the user to be deleted.
"""
self.db.deleteUser(username)
def change_password(self, username, password):
"""change the password for a user.
Args:
username (str): username of the user to change the password for.
password (str): the new, non-hashed password to change to.
"""
hashed_password = self.hash_password(password)
self.db.changePassword(username, hashed_password)

View File

@@ -4,7 +4,7 @@ import regex
import requests
from bs4 import BeautifulSoup
from src.logic import BookData as Book
from src.core.models import BookData as Book
from src.shared.logging import log
URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
@@ -48,7 +48,8 @@ class Catalogue:
log.info(f"Searching for term: {searchterm}")
links = self.get_book_links(searchterm)
print(links)
# debug: links
# print(links)
for elink in links:
result = self.search(elink)
# in result search for class col-xs-12 rds-dl RDS_LOCATION
@@ -60,12 +61,14 @@ class Catalogue:
title = title_el.get_text(strip=True) if title_el else None
ppn_el = soup.find(
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"
"div",
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN",
)
# in ppn_el, get text of div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
ppn = (
ppn_el.find_next_sibling(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
"div",
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
).get_text(strip=True)
if ppn_el
else None
@@ -73,18 +76,21 @@ class Catalogue:
# get edition text at div class col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION
edition_el = soup.find(
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION"
"div",
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION",
)
edition = (
edition_el.find_next_sibling(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
"div",
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
).get_text(strip=True)
if edition_el
else None
)
authors = soup.find_all(
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
"div",
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON",
)
author = None
if authors:
@@ -92,7 +98,8 @@ class Catalogue:
author_names = []
for author in authors:
panel = author.find_next_sibling(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
"div",
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
)
if panel:
links = panel.find_all("a")
@@ -109,7 +116,7 @@ class Catalogue:
groups = []
cur = {}
for node in panel.select(
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space",
):
classes = node.get("class", [])
# Separator between entries
@@ -151,30 +158,30 @@ class Catalogue:
author=author,
edition=edition,
)
else:
return Book(
title=title,
ppn=ppn,
signature=signature,
library_location=loc.split("\n\n")[-1],
link=elink,
author=author,
edition=edition,
)
return Book(
title=title,
ppn=ppn,
signature=signature,
library_location=loc.split("\n\n")[-1],
link=elink,
author=author,
edition=edition,
)
def get(self, ppn: str) -> Book | None:
# based on PPN, get title, people, edition, year, language, pages, isbn,
link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}"
result = self.search(link)
soup = BeautifulSoup(result, "html.parser")
BeautifulSoup(result, "html.parser")
def get_ppn(self, searchterm: str) -> str | None:
links = self.get_book_links(searchterm)
ppn = None
for link in links:
result = self.search(link)
soup = BeautifulSoup(result, "html.parser")
print(link)
BeautifulSoup(result, "html.parser")
# debug: link
# print(link)
ppn = link.split("/")[-1]
if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn):
return ppn
@@ -208,14 +215,16 @@ class Catalogue:
soup = BeautifulSoup(result, "html.parser")
# get all authors, return them as a string seperated by ;
authors = soup.find_all(
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
"div",
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON",
)
if authors:
# get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
author_names = []
for author in authors:
panel = author.find_next_sibling(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
"div",
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
)
if panel:
links = panel.find_all("a")
@@ -236,7 +245,7 @@ class Catalogue:
groups = []
cur = {}
for node in panel.select(
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space",
):
classes = node.get("class", [])
# Separator between entries
@@ -266,15 +275,15 @@ class Catalogue:
# Find the signature for the entry whose location mentions "Semesterapparat"
for g in groups:
print(g)
# debug: group contents
# print(g)
loc = g.get("location", "").lower()
if "semesterapparat" in loc:
signature = g.get("signature")
return signature
else:
signature = g.get("signature")
return signature
print("No signature found")
signature = g.get("signature")
return signature
# print("No signature found")
return signature
def in_library(self, ppn: str) -> bool:

View File

@@ -1,72 +0,0 @@
import os
from pathlib import Path
from src.backend.database import Database
import loguru
import sys
from src import LOG_DIR
log = loguru.logger
log.remove()
log.add(sys.stdout, level="INFO")
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
db = Database()
def recreateFile(name: str, app_id: int, filetype: str, open: bool = True) -> Path:
"""
recreateFile creates a file from the database and opens it in the respective program, if the open parameter is set to True.
Args:
----
- name (str): The filename selected by the user.
- app_id (str): the id of the apparatus.
- filetype (str): the extension of the file to be created.
- open (bool, optional): Determines if the file should be opened. Defaults to True.
Returns:
-------
- Path: Absolute path to the file.
"""
path = db.recreateFile(name, app_id, filetype=filetype)
path = Path(path)
log.info(f"File created: {path}")
if open:
if os.getenv("OS") == "Windows_NT":
path = path.resolve()
os.startfile(path)
else:
path = path.resolve()
os.system(f"open {path}")
return path
def recreateElsaFile(filename: str, filetype: str, open=True) -> Path:
"""
recreateElsaFile creates a file from the database and opens it in the respective program, if the open parameter is set to True.
Args:
----
- filename (str): The filename selected by the user.
- open (bool, optional): Determines if the file should be opened. Defaults to True.
Returns:
-------
- Path: Absolute path to the file.
"""
if filename.startswith("(") and filename.endswith(")"):
filename = str(filename[1:-1].replace("'", ""))
if not isinstance(filename, str):
raise ValueError("filename must be a string")
path = db.recreateElsaFile(filename, filetype)
path = Path(path)
if open:
if os.getenv("OS") == "Windows_NT":
path = path.resolve()
os.startfile(path)
else:
path = path.resolve()
os.system(f"open {path}")
return path

File diff suppressed because it is too large Load Diff

View File

@@ -1,112 +0,0 @@
CREATE_TABLE_APPARAT = """CREATE TABLE semesterapparat (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
name TEXT,
prof_id INTEGER,
fach TEXT,
appnr INTEGER,
erstellsemester TEXT,
verlängert_am TEXT,
dauer BOOLEAN,
verlängerung_bis TEXT,
deletion_status INTEGER,
deleted_date TEXT,
apparat_id_adis INTEGER,
prof_id_adis INTEGER,
konto INTEGER,
FOREIGN KEY (prof_id) REFERENCES prof (id)
)"""
CREATE_TABLE_MEDIA = """CREATE TABLE media (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
bookdata TEXT,
app_id INTEGER,
prof_id INTEGER,
deleted INTEGER DEFAULT (0),
available BOOLEAN,
reservation BOOLEAN,
FOREIGN KEY (prof_id) REFERENCES prof (id),
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
)"""
CREATE_TABLE_FILES = """CREATE TABLE files (
id INTEGER PRIMARY KEY,
filename TEXT,
fileblob BLOB,
app_id INTEGER,
filetyp TEXT,
prof_id INTEGER REFERENCES prof (id),
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
)"""
CREATE_TABLE_MESSAGES = """CREATE TABLE messages (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
created_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
message TEXT NOT NULL,
remind_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
user_id INTEGER NOT NULL,
appnr INTEGER,
FOREIGN KEY (user_id) REFERENCES user (id)
)"""
CREATE_TABLE_PROF = """CREATE TABLE prof (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
titel TEXT,
fname TEXT,
lname TEXT,
fullname TEXT NOT NULL UNIQUE,
mail TEXT,
telnr TEXT
)"""
CREATE_TABLE_USER = """CREATE TABLE user (
id integer NOT NULL PRIMARY KEY AUTOINCREMENT,
created_at datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
username TEXT NOT NULL UNIQUE,
password TEXT NOT NULL,
salt TEXT NOT NULL,
role TEXT NOT NULL,
email TEXT UNIQUE,
name TEXT
)"""
CREATE_TABLE_SUBJECTS = """CREATE TABLE subjects (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
name TEXT NOT NULL UNIQUE
)"""
CREATE_ELSA_TABLE = """CREATE TABLE elsa (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
date TEXT NOT NULL,
semester TEXT NOT NULL,
prof_id INTEGER NOT NULL
)"""
CREATE_ELSA_FILES_TABLE = """CREATE TABLE elsa_files (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
filename TEXT NOT NULL,
fileblob BLOB NOT NULL,
elsa_id INTEGER NOT NULL,
filetyp TEXT NOT NULL,
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
)"""
CREATE_ELSA_MEDIA_TABLE = """CREATE TABLE elsa_media (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
work_author TEXT,
section_author TEXT,
year TEXT,
edition TEXT,
work_title TEXT,
chapter_title TEXT,
location TEXT,
publisher TEXT,
signature TEXT,
issue TEXT,
pages TEXT,
isbn TEXT,
type TEXT,
elsa_id INTEGER NOT NULL,
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
)"""
CREATE_TABLE_NEWEDITIONS = """CREATE TABLE neweditions (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
new_bookdata TEXT,
old_edition_id INTEGER,
for_apparat INTEGER,
ordered BOOLEAN DEFAULT (0),
FOREIGN KEY (old_edition_id) REFERENCES media (id),
FOREIGN KEY (for_apparat) REFERENCES semesterapparat (id)
)"""

View File

@@ -1,20 +0,0 @@
import os
from src import settings
database = settings.database
def delete_temp_contents():
"""
delete_temp_contents deletes the contents of the temp directory.
"""
path = database.temp.expanduser()
for root, dirs, files in os.walk(path):
for file in files:
os.remove(os.path.join(root, file))
for dir in dirs:
os.rmdir(os.path.join(root, dir))
if __name__ == "__main__":
delete_temp_contents()

View File

@@ -1,23 +0,0 @@
from PySide6.QtCore import QThread, Slot
from src.utils.documentation import website, QuietHandler
from wsgiref.simple_server import make_server
class DocumentationThread(QThread):
def __init__(self):
super().__init__()
self._server = None # store server so we can shut it down
def run(self):
# launch_documentation()
self._server = make_server(
"localhost", 8000, website(), handler_class=QuietHandler
)
while not self.isInterruptionRequested():
self._server.handle_request()
@Slot() # slot you can connect to aboutToQuit
def stop(self):
self.requestInterruption() # ask the loop above to exit
if self._server:
self._server.shutdown() # unblock handle_request()

View File

@@ -1,68 +0,0 @@
import os
import sqlite3 as sql
from pathlib import Path
from typing import List
from src import DATABASE_DIR, settings
from src.shared.logging import log
MIGRATIONS_DIR = Path(__file__).parent / "migrations"
def _ensure_migrations_table(conn: sql.Connection) -> None:
cursor = conn.cursor()
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS schema_migrations (
id TEXT PRIMARY KEY,
applied_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
)
"""
)
conn.commit()
def _applied_migrations(conn: sql.Connection) -> List[str]:
cursor = conn.cursor()
cursor.execute("SELECT id FROM schema_migrations ORDER BY id")
rows = cursor.fetchall()
return [r[0] for r in rows]
def _apply_sql_file(conn: sql.Connection, path: Path) -> None:
log.info(f"Applying migration {path.name}")
sql_text = path.read_text(encoding="utf-8")
cursor = conn.cursor()
cursor.executescript(sql_text)
cursor.execute(
"INSERT OR REPLACE INTO schema_migrations (id) VALUES (?)", (path.name,)
)
conn.commit()
def run_migrations(db_path: Path) -> None:
"""Run all unapplied migrations from the migrations directory against the database at db_path."""
if not MIGRATIONS_DIR.exists():
log.debug("Migrations directory does not exist, skipping migrations")
return
# Ensure database directory exists
db_dir = settings.database.path or Path(DATABASE_DIR)
if not db_dir.exists():
os.makedirs(db_dir, exist_ok=True)
conn = sql.connect(db_path)
try:
_ensure_migrations_table(conn)
applied = set(_applied_migrations(conn))
migration_files = sorted(
[p for p in MIGRATIONS_DIR.iterdir() if p.suffix in (".sql",)]
)
for m in migration_files:
if m.name in applied:
log.debug(f"Skipping already applied migration {m.name}")
continue
_apply_sql_file(conn, m)
finally:
conn.close()

View File

@@ -1,132 +0,0 @@
BEGIN TRANSACTION;
CREATE TABLE IF NOT EXISTS semesterapparat (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
name TEXT,
prof_id INTEGER,
fach TEXT,
appnr INTEGER,
erstellsemester TEXT,
verlängert_am TEXT,
dauer BOOLEAN,
verlängerung_bis TEXT,
deletion_status INTEGER,
deleted_date TEXT,
apparat_id_adis INTEGER,
prof_id_adis INTEGER,
konto INTEGER,
FOREIGN KEY (prof_id) REFERENCES prof (id)
);
CREATE TABLE IF NOT EXISTS media (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
bookdata TEXT,
app_id INTEGER,
prof_id INTEGER,
deleted INTEGER DEFAULT (0),
available BOOLEAN,
reservation BOOLEAN,
FOREIGN KEY (prof_id) REFERENCES prof (id),
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
);
CREATE TABLE IF NOT EXISTS files (
id INTEGER PRIMARY KEY,
filename TEXT,
fileblob BLOB,
app_id INTEGER,
filetyp TEXT,
prof_id INTEGER REFERENCES prof (id),
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
);
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
created_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
message TEXT NOT NULL,
remind_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
user_id INTEGER NOT NULL,
appnr INTEGER,
FOREIGN KEY (user_id) REFERENCES user (id)
);
CREATE TABLE IF NOT EXISTS prof (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
titel TEXT,
fname TEXT,
lname TEXT,
fullname TEXT NOT NULL UNIQUE,
mail TEXT,
telnr TEXT
);
CREATE TABLE IF NOT EXISTS user (
id integer NOT NULL PRIMARY KEY AUTOINCREMENT,
created_at datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
username TEXT NOT NULL UNIQUE,
password TEXT NOT NULL,
salt TEXT NOT NULL,
role TEXT NOT NULL,
email TEXT UNIQUE,
name TEXT
);
CREATE TABLE IF NOT EXISTS subjects (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS elsa (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
date TEXT NOT NULL,
semester TEXT NOT NULL,
prof_id INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS elsa_files (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
filename TEXT NOT NULL,
fileblob BLOB NOT NULL,
elsa_id INTEGER NOT NULL,
filetyp TEXT NOT NULL,
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
);
CREATE TABLE IF NOT EXISTS elsa_media (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
work_author TEXT,
section_author TEXT,
year TEXT,
edition TEXT,
work_title TEXT,
chapter_title TEXT,
location TEXT,
publisher TEXT,
signature TEXT,
issue TEXT,
pages TEXT,
isbn TEXT,
type TEXT,
elsa_id INTEGER NOT NULL,
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
);
CREATE TABLE IF NOT EXISTS neweditions (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
new_bookdata TEXT,
old_edition_id INTEGER,
for_apparat INTEGER,
ordered BOOLEAN DEFAULT (0),
FOREIGN KEY (old_edition_id) REFERENCES media (id),
FOREIGN KEY (for_apparat) REFERENCES semesterapparat (id)
);
-- Helpful indices to speed up frequent lookups and joins
CREATE INDEX IF NOT EXISTS idx_media_app_prof ON media(app_id, prof_id);
CREATE INDEX IF NOT EXISTS idx_media_deleted ON media(deleted);
CREATE INDEX IF NOT EXISTS idx_media_available ON media(available);
CREATE INDEX IF NOT EXISTS idx_messages_remind_at ON messages(remind_at);
CREATE INDEX IF NOT EXISTS idx_semesterapparat_prof ON semesterapparat(prof_id);
CREATE INDEX IF NOT EXISTS idx_semesterapparat_appnr ON semesterapparat(appnr);
COMMIT;

View File

@@ -1,10 +0,0 @@
BEGIN TRANSACTION;
CREATE TABLE IF NOT EXISTS webadis_login (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
username TEXT NOT NULL,
password TEXT NOT NULL
);
COMMIT;

View File

@@ -1,6 +0,0 @@
BEGIN TRANSACTION;
ALTER TABLE webadis_login
ADD COLUMN effective_range TEXT;
COMMIT;

View File

@@ -1,10 +0,0 @@
import pickle
from typing import Any, ByteString
def make_pickle(data: Any):
return pickle.dumps(data)
def load_pickle(data: ByteString):
return pickle.loads(data)

View File

@@ -1,26 +0,0 @@
from dataclasses import dataclass, field
import yaml
@dataclass
class Settings:
"""Settings for the app."""
save_path: str
database_name: str
database_path: str
bib_id: str
default_apps: bool = True
custom_applications: list[dict] = field(default_factory=list)
def save_settings(self):
"""Save the settings to the config file."""
with open("config.yaml", "w") as f:
yaml.dump(self.__dict__, f)
def load_settings(self):
"""Load the settings from the config file."""
with open("config.yaml", "r") as f:
data = yaml.safe_load(f)
return data

View File

@@ -1,199 +0,0 @@
from PySide6.QtCore import QThread, Signal
from src.backend import Database
from src.logic.webrequest import BibTextTransformer, WebRequest
from src.shared.logging import log
# Logger configured centrally in main; this module just uses `log`
class BookGrabber(QThread):
updateSignal = Signal(int, int)
done = Signal()
def __init__(self):
super(BookGrabber, self).__init__(parent=None)
self.is_Running = True
log.info("Starting worker thread")
self.data = []
self.app_id = None
self.prof_id = None
self.mode = None
self.book_id = None
self.use_any = False
self.use_exact = False
self.app_nr = None
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
self.request = WebRequest()
self.db = Database()
def add_values(
self, app_id: int, prof_id: int, mode: str, data, any_book=False, exact=False
):
self.app_id = app_id
self.prof_id = prof_id
self.mode = mode
self.data: list[str] = data
self.use_any = any_book
self.use_exact = exact
log.info(f"Working on {len(self.data)} entries")
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
log.debug("State: " + str(self.tstate))
app_nr = self.db.query_db(
"SELECT appnr FROM semesterapparat WHERE id = ?", (self.app_id,)
)[0][0]
self.request.set_apparat(app_nr)
# log.debug(self.tstate)
def run(self):
item = 0
iterdata = self.data
# log.debug(iterdata)
for entry in iterdata:
# log.debug(entry)
log.info("Processing entry: {}", entry)
webdata = self.request.get_ppn(entry)
if self.use_any:
webdata = webdata.use_any_book
webdata = webdata.get_data()
if webdata == "error":
continue
bd = BibTextTransformer(self.mode)
log.debug(webdata)
if self.mode == "ARRAY":
if self.use_exact:
bd = bd.use_signature(entry)
bd = bd.get_data(webdata).return_data()
log.debug(bd)
if bd is None:
# bd = BookData
continue
bd.signature = entry
transformer = (
BibTextTransformer("RDS").get_data(webdata).return_data("rds_data")
)
# confirm lock is acquired
self.db.addBookToDatabase(bd, self.app_id, self.prof_id)
# get latest book id
self.book_id = self.db.getLastBookId()
log.info("Added book to database")
state = 0
for result in transformer.RDS_DATA:
# log.debug(result.RDS_LOCATION)
if str(self.app_nr) in result.RDS_LOCATION:
state = 1
break
log.info(f"State of {entry}: {state}")
log.debug(
"updating availability of " + str(self.book_id) + " to " + str(state)
)
try:
self.db.setAvailability(self.book_id, state)
log.debug("Added book to database")
except Exception as e:
log.error(f"Failed to update availability: {e}")
log.debug("Failed to update availability: " + str(e))
# time.sleep(5)
item += 1
self.updateSignal.emit(item, len(self.data))
log.info("Worker thread finished")
# self.done.emit()
self.quit()
def stop(self):
self.is_Running = False
class BookGrabberTest(QThread):
updateSignal = Signal(int, int)
done = Signal()
def __init__(self, appnr: int):
super(BookGrabberTest, self).__init__(parent=None)
self.is_Running = True
log.info("Starting worker thread")
self.data = None
self.app_nr = None
self.prof_id = None
self.mode = None
self.book_id = None
self.use_any = False
self.use_exact = False
self.app_nr = appnr
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
self.results = []
def add_values(
self, app_nr: int, prof_id: int, mode: str, data, any_book=False, exact=False
):
self.app_nr = app_nr
self.prof_id = prof_id
self.mode = mode
self.data = data
self.use_any = any_book
self.use_exact = exact
log.info(f"Working on {len(self.data)} entries")
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
log.debug("State: " + str(self.tstate))
# log.debug(self.tstate)
def run(self):
item = 0
iterdata = self.data
# log.debug(iterdata)
for entry in iterdata:
# log.debug(entry)
signature = str(entry)
log.info("Processing entry: " + signature)
webdata = WebRequest().set_apparat(self.app_nr).get_ppn(entry)
if self.use_any:
webdata = webdata.use_any_book
webdata = webdata.get_data()
if webdata == "error":
continue
bd = BibTextTransformer(self.mode)
if self.mode == "ARRAY":
if self.use_exact:
bd = bd.use_signature(entry)
bd = bd.get_data(webdata).return_data()
if bd is None:
# bd = BookData
continue
bd.signature = entry
transformer = (
BibTextTransformer("RDS").get_data(webdata).return_data("rds_data")
)
# confirm lock is acquired
# get latest book id
log.info("Added book to database")
state = 0
for result in transformer.RDS_DATA:
# log.debug(result.RDS_LOCATION)
if str(self.app_nr) in result.RDS_LOCATION:
state = 1
break
log.info(f"State of {signature}: {state}")
# log.debug("updating availability of " + str(self.book_id) + " to " + str(state))
self.results.append(bd)
# time.sleep(5)
item += 1
self.updateSignal.emit(item, len(self.data))
log.info("Worker thread finished")
# self.done.emit()
self.quit()
def stop(self):
self.is_Running = False

View File

@@ -1,345 +0,0 @@
import os
import re
from concurrent.futures import ThreadPoolExecutor
from math import ceil
from queue import Empty, Queue
from time import monotonic # <-- NEW
from typing import List, Optional
from PySide6.QtCore import QThread, Signal
# from src.logic.webrequest import BibTextTransformer, WebRequest
from src.backend.catalogue import Catalogue
from src.logic import BookData
from src.logic.SRU import SWB
from src.shared.logging import log
# use all available cores - 2, but at least 1
THREAD_COUNT = max(os.cpu_count() - 2, 1)
THREAD_MIN_ITEMS = 5
# Logger configured centrally in main; use shared `log`
swb = SWB()
dnb = SWB()
cat = Catalogue()
RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative RVK character set
def find_newer_edition(
swb_result: BookData, dnb_result: List[BookData]
) -> Optional[List[BookData]]:
"""
New edition if:
- year > swb.year OR
- edition_number > swb.edition_number
BUT: discard any candidate with year < swb.year (if both years are known).
Same-work check:
- Compare RVK roots of signatures (after stripping trailing '+N' and '(N)').
- If both have signatures and RVKs differ -> skip.
Preferences (in order):
1) RVK matches SWB
2) Print over Online-Ressource
3) Has signature
4) Newer: (year desc, edition_number desc)
"""
def strip_copy_and_edition(s: str) -> str:
s = re.sub(r"\(\s*\d+\s*\)", "", s) # remove '(N)'
s = re.sub(r"\s*\+\s*\d+\s*$", "", s) # remove trailing '+N'
return s
def extract_rvk_root(sig: Optional[str]) -> str:
if not sig:
return ""
t = strip_copy_and_edition(sig.upper())
t = re.sub(r"\s+", " ", t).strip()
m = re.match(rf"^([A-Z]{{1,3}}\s*{RVK_ALLOWED}*)", t)
if not m:
cleaned = re.sub(rf"[^{RVK_ALLOWED} ]+", "", t).strip()
return cleaned.split(" ")[0] if cleaned else ""
return re.sub(r"\s+", " ", m.group(1)).strip()
def has_sig(b: BookData) -> bool:
return bool(getattr(b, "signature", None))
def is_online(b: BookData) -> bool:
return (getattr(b, "media_type", None) or "").strip() == "Online-Ressource"
def is_print(b: BookData) -> bool:
return not is_online(b)
def rvk_matches_swb(b: BookData) -> bool:
if not has_sig(b) or not has_sig(swb_result):
return False
return extract_rvk_root(b.signature) == extract_rvk_root(swb_result.signature)
def strictly_newer(b: BookData) -> bool:
# Hard guard: if both years are known and candidate is older, discard
if (
b.year is not None
and swb_result.year is not None
and b.year < swb_result.year
):
return False
newer_by_year = (
b.year is not None
and swb_result.year is not None
and b.year > swb_result.year
)
newer_by_edition = (
b.edition_number is not None
and swb_result.edition_number is not None
and b.edition_number > swb_result.edition_number
)
# Thanks to the guard above, newer_by_edition can't pick something with a smaller year.
return newer_by_year or newer_by_edition
swb_has_sig = has_sig(swb_result)
swb_rvk = extract_rvk_root(getattr(swb_result, "signature", None))
# 1) Filter: same work (by RVK if both have sigs) AND strictly newer
candidates: List[BookData] = []
for b in dnb_result:
if has_sig(b) and swb_has_sig:
if extract_rvk_root(b.signature) != swb_rvk:
continue # different work
if strictly_newer(b):
candidates.append(b)
if not candidates:
return None
# 2) Dedupe by PPN → prefer (rvk-match, is-print, has-signature)
def pref_score(x: BookData) -> tuple[int, int, int]:
return (
1 if rvk_matches_swb(x) else 0,
1 if is_print(x) else 0,
1 if has_sig(x) else 0,
)
by_ppn: dict[Optional[str], BookData] = {}
for b in candidates:
key = getattr(b, "ppn", None)
prev = by_ppn.get(key)
if prev is None or pref_score(b) > pref_score(prev):
by_ppn[key] = b
deduped = list(by_ppn.values())
if not deduped:
return None
# 3) Preserve all qualifying newer editions, but order by preference
def sort_key(b: BookData):
year = b.year if b.year is not None else -1
ed = b.edition_number if b.edition_number is not None else -1
return (
1 if rvk_matches_swb(b) else 0,
1 if is_print(b) else 0,
1 if has_sig(b) else 0,
year,
ed,
)
deduped.sort(key=sort_key, reverse=True)
return deduped
class NewEditionCheckerThread(QThread):
updateSignal = Signal(int, int) # (processed, total)
updateProgress = Signal(int, int) # (processed, total)
total_entries_signal = Signal(int)
resultsSignal = Signal(list) # list[tuple[BookData, list[BookData]]]
# NEW: metrics signals
rateSignal = Signal(float) # items per second ("it/s")
etaSignal = Signal(int) # seconds remaining (-1 when unknown)
def __init__(self, entries: Optional[list["BookData"]] = None, parent=None):
super().__init__(parent)
self.entries: list["BookData"] = entries if entries is not None else []
self.results: list[tuple["BookData", list["BookData"]]] = []
def reset(self):
self.entries = []
self.results = []
# ---------- internal helpers ----------
@staticmethod
def _split_evenly(items: list, parts: int) -> list[list]:
"""Split items as evenly as possible into `parts` chunks (no empty tails)."""
if parts <= 1 or len(items) <= 1:
return [items]
n = len(items)
base = n // parts
extra = n % parts
chunks = []
i = 0
for k in range(parts):
size = base + (1 if k < extra else 0)
if size == 0:
continue
chunks.append(items[i : i + size])
i += size
return chunks
@staticmethod
def _clean_title(raw: str) -> str:
title = raw.rstrip(" .:,;!?")
title = re.sub(r"\s*\(.*\)", "", title)
return title.strip()
@classmethod
def _process_book(
cls, book: "BookData"
) -> tuple["BookData", list["BookData"]] | None:
"""Process one book; returns (original, [found editions]) or None on failure."""
if not book.title:
return None
response: list["BookData"] = []
query = [
f"pica.tit={book.title}",
f"pica.vlg={book.publisher}",
]
swb_result = swb.getBooks(["pica.bib=20735", f"pica.ppn={book.ppn}"])[0]
dnb_results = swb.getBooks(query)
new_editions = find_newer_edition(swb_result, dnb_results)
if new_editions is not None:
for new_edition in new_editions:
new_edition.library_location = cat.get_location(new_edition.ppn)
try:
isbn = (
str(new_edition.isbn[0])
if isinstance(new_edition.isbn, list)
else str(new_edition.isbn)
)
new_edition.link = (
f"https://www.lehmanns.de/search/quick?mediatype_id=2&q={isbn}"
)
except (IndexError, TypeError):
isbn = None
new_edition.in_library = cat.in_library(new_edition.ppn)
response = new_editions
# client = SWB()
# response: list["BookData"] = []
# # First, search by title only
# results = client.getBooks([f"pica.title={title}", f"pica.vlg={book.publisher}"])
# lehmanns = LehmannsClient()
# results = lehmanns.search_by_title(title)
# for result in results:
# if "(eBook)" in result.title:
# result.title = result.title.replace("(eBook)", "").strip()
# swb_results = client.getBooks(
# [
# f"pica.tit={result.title}",
# f"pica.vlg={result.publisher.split(',')[0]}",
# ]
# )
# for swb in swb_results:
# if swb.isbn == result.isbn:
# result.ppn = swb.ppn
# result.signature = swb.signature
# response.append(result)
# if (result.edition_number < swb.edition_number) and (
# swb.year > result.year
# ):
# response.append(result)
if response == []:
return None
# Remove duplicates based on ppn
return (book, response)
@classmethod
def _worker(cls, items: list["BookData"], q: Queue) -> None:
"""Worker for one chunk; pushes ('result', ...), ('progress', 1), and ('done', None)."""
try:
for book in items:
try:
result = cls._process_book(book)
except Exception:
result = None
if result is not None:
q.put(("result", result))
q.put(("progress", 1))
finally:
q.put(("done", None))
# ---------- thread entry point ----------
def run(self):
total = len(self.entries)
self.total_entries_signal.emit(total)
# start timer for metrics
t0 = monotonic()
if total == 0:
log.debug("No entries to process.")
# emit metrics (zero work)
self.rateSignal.emit(0.0)
self.etaSignal.emit(0)
self.resultsSignal.emit([])
return
# Up to 4 workers; ~20 items per worker
num_workers = min(THREAD_COUNT, max(1, ceil(total / THREAD_MIN_ITEMS)))
chunks = self._split_evenly(self.entries, num_workers)
sizes = [len(ch) for ch in chunks]
q: Queue = Queue()
processed = 0
finished_workers = 0
with ThreadPoolExecutor(max_workers=len(chunks)) as ex:
futures = [ex.submit(self._worker, ch, q) for ch in chunks]
log.info(
f"Launched {len(futures)} worker thread(s) for {total} entries: {sizes} entries per thread."
)
for idx, sz in enumerate(sizes, 1):
log.debug(f"Thread {idx}: {sz} entries")
# Aggregate progress/results
while finished_workers < len(chunks):
try:
kind, payload = q.get(timeout=0.1)
except Empty:
continue
if kind == "progress":
processed += int(payload)
self.updateSignal.emit(processed, total)
self.updateProgress.emit(processed, total)
# ---- NEW: compute & emit metrics ----
elapsed = max(1e-9, monotonic() - t0)
rate = processed / elapsed # items per second
remaining = max(0, total - processed)
eta_sec = int(round(remaining / rate)) if rate > 0 else -1
self.rateSignal.emit(rate)
# clamp negative just in case
self.etaSignal.emit(max(0, eta_sec) if eta_sec >= 0 else -1)
# -------------------------------------
elif kind == "result":
self.results.append(payload)
elif kind == "done":
finished_workers += 1
# Final metrics on completion
elapsed_total = max(1e-9, monotonic() - t0)
final_rate = total / elapsed_total
self.rateSignal.emit(final_rate)
self.etaSignal.emit(0)
self.resultsSignal.emit(self.results)

View File

@@ -1,59 +0,0 @@
import sys
import time
import loguru
# from icecream import ic
from PySide6.QtCore import QThread
from PySide6.QtCore import Signal as Signal
from src import LOG_DIR
from src.backend import Database
log = loguru.logger
log.remove()
log.add(sys.stdout, level="INFO")
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
# from src.transformers import RDS_AVAIL_DATA
class AutoAdder(QThread):
updateSignal = Signal(int)
setTextSignal = Signal(int)
progress = Signal(int)
def __init__(self, data=None, app_id=None, prof_id=None, parent=None):
super().__init__(parent)
self.data = data
self.app_id = app_id
self.prof_id = prof_id
# #print("Launched AutoAdder")
# #print(self.data, self.app_id, self.prof_id)
def run(self):
self.db = Database()
# show the dialog, start the thread to gather data and dynamically update progressbar and listwidget
log.info("Starting worker thread")
item = 0
for entry in self.data:
try:
self.updateSignal.emit(item)
self.setTextSignal.emit(entry)
item += 1
self.progress.emit(item)
time.sleep(1)
except Exception as e:
# #print(e)
log.exception(
f"The query failed with message {e} for signature {entry}"
)
continue
if item == len(self.data):
log.info("Worker thread finished")
# teminate thread
self.finished.emit()

View File

@@ -1,83 +0,0 @@
# from icecream import ic
from PySide6.QtCore import QThread
from PySide6.QtCore import Signal as Signal
from src.backend.database import Database
from src.backend.webadis import get_book_medianr
from src.logic.webrequest import BibTextTransformer, TransformerType, WebRequest
from src.shared.logging import log
class AvailChecker(QThread):
updateSignal = Signal(str, int)
updateProgress = Signal(int, int)
def __init__(
self,
links: list[str] | None = None,
appnumber: int | None = None,
parent=None,
books: list[dict] | None = None,
):
if links is None:
links = []
super().__init__(parent)
log.info("Starting worker thread")
log.info(
"Checking availability for "
+ str(links)
+ " with appnumber "
+ str(appnumber)
+ "..."
)
self.links = links
self.appnumber = appnumber
self.books = books or []
log.info(
f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books..."
)
# Pre-create reusable request and transformer to avoid per-item overhead
self._request = WebRequest().set_apparat(self.appnumber)
self._rds_transformer = BibTextTransformer(TransformerType.RDS)
def run(self):
self.db = Database()
state = 0
count = 0
for link in self.links:
log.info("Processing entry: " + str(link))
data = self._request.get_ppn(link).get_data()
rds = self._rds_transformer.get_data(data).return_data("rds_availability")
book_id = None
if not rds or not rds.items:
log.warning(f"No RDS data found for link {link}")
continue
for item in rds.items:
sign = item.superlocation
loc = item.location
# # #print(item.location)
if str(self.appnumber) in sign or str(self.appnumber) in loc:
state = 1
break
for book in self.books:
if book["bookdata"].signature == link:
book_id = book["id"]
break
log.info(f"State of {link}: " + str(state))
# #print("Updating availability of " + str(book_id) + " to " + str(state))
# use get_book_medianr to update the medianr of the book in the database
auth = self.db.getWebADISAuth
medianr = get_book_medianr(rds.items[0].callnumber, self.appnumber, auth)
book_data = book["bookdata"]
book_data.medianr = medianr
self.db.updateBookdata(book["id"], book_data)
self.db.setAvailability(book_id, state)
count += 1
self.updateProgress.emit(count, len(self.links))
self.updateSignal.emit(item.callnumber, state)
log.info("Worker thread finished")
# teminate thread
self.quit()

View File

@@ -1,35 +0,0 @@
from playwright.sync_api import sync_playwright
def get_book_medianr(signature: str, semesterapparat_nr: int, auth: tuple) -> str:
with sync_playwright() as playwright:
browser = playwright.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
page.goto(
"https://bsz.ibs-bw.de:22998/aDISWeb/app?service=direct/0/Home/$DirectLink&sp=SDAP42"
)
page.get_by_role("textbox", name="Benutzer").fill(auth[0])
page.get_by_role("textbox", name="Benutzer").press("Tab")
page.get_by_role("textbox", name="Kennwort").fill(auth[1])
page.get_by_role("textbox", name="Kennwort").press("Enter")
page.get_by_role("button", name="Katalog").click()
page.get_by_role("textbox", name="Signatur").click()
page.get_by_role("textbox", name="Signatur").fill(signature)
page.get_by_role("textbox", name="Signatur").press("Enter")
book_list = page.locator("iframe").content_frame.get_by_role(
"cell", name="Bibliothek der Pädagogischen"
)
# this will always find one result, we need to split the resulting text based on the entries that start with "* "
book_entries = book_list.inner_text().split("\n")
books = []
for entry in book_entries:
if entry.startswith("* "):
books.append(entry)
for book in books:
if f"Semesterapparat: {semesterapparat_nr}" in book:
return book.split("* ")[1].split(":")[0]
# ---------------------
context.close()
browser.close()

View File

@@ -1,20 +1,14 @@
import sys
import time
import loguru
from src.shared.logging import log
# from icecream import ic
from PySide6.QtCore import QThread
from PySide6.QtCore import Signal as Signal
from src import LOG_DIR
from src.database import Database
log = loguru.logger
log.remove()
log.add(sys.stdout, level="INFO")
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
# use centralized logging from src.shared.logging
# from src.transformers import RDS_AVAIL_DATA

View File

@@ -28,19 +28,19 @@ class AvailChecker(QThread):
+ str(links)
+ " with appnumber "
+ str(appnumber)
+ "..."
+ "...",
)
self.links = links
self.appnumber = appnumber
self.books = books or []
log.info(
f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books..."
f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books...",
)
# Pre-create reusable request and transformer to avoid per-item overhead
self._request = WebRequest().set_apparat(self.appnumber)
self._rds_transformer = BibTextTransformer(TransformerType.RDS)
def run(self):
def run(self) -> None:
self.db = Database()
state = 0
count = 0

View File

@@ -2,7 +2,10 @@ from PySide6.QtCore import QThread, Signal
from src.database import Database
from src.services.webrequest import BibTextTransformer, WebRequest
from src.shared.logging import log
from src.shared.logging import log, get_bloat_logger, preview
# bloat logger for large/raw payloads
bloat = get_bloat_logger()
# Logger configured centrally in main; this module just uses `log`
@@ -63,12 +66,12 @@ class BookGrabber(QThread):
continue
bd = BibTextTransformer(self.mode)
log.debug(webdata)
bloat.debug("Web response (preview): {}", preview(webdata, 2000))
if self.mode == "ARRAY":
if self.use_exact:
bd = bd.use_signature(entry)
bd = bd.get_data(webdata).return_data()
log.debug(bd)
bloat.debug("Transformed bookdata (preview): {}", preview(bd, 1000))
if bd is None:
# bd = BookData
continue

View File

@@ -1,23 +1,28 @@
from PySide6.QtCore import QThread, Slot
from src.utils.documentation import website, QuietHandler
from wsgiref.simple_server import make_server
from src.utils.documentation import start_documentation_server
class DocumentationThread(QThread):
def __init__(self):
super().__init__()
self._server = None # store server so we can shut it down
self._process = None # store subprocess so we can shut it down
def run(self):
# launch_documentation()
self._server = make_server(
"localhost", 8000, website(), handler_class=QuietHandler
)
while not self.isInterruptionRequested():
self._server.handle_request()
# Start the zensical documentation server
self._process = start_documentation_server()
# Keep thread alive until interruption is requested
if self._process:
while not self.isInterruptionRequested():
self.msleep(100) # Check every 100ms
@Slot() # slot you can connect to aboutToQuit
def stop(self):
self.requestInterruption() # ask the loop above to exit
if self._server:
self._server.shutdown() # unblock handle_request()
if self._process:
self._process.terminate() # terminate the subprocess
try:
self._process.wait(timeout=5) # wait up to 5 seconds
except Exception:
self._process.kill() # force kill if it doesn't stop

View File

@@ -12,7 +12,7 @@ from .models import (
Subjects,
XMLMailSubmission,
)
from .constants import *
from .constants import * # noqa: F403
from .semester import Semester
__all__ = [

View File

@@ -1,3 +1,5 @@
from __future__ import annotations
import json
from dataclasses import dataclass, field
from enum import Enum
@@ -5,22 +7,21 @@ from typing import Any, Optional, Union
import regex
from src.logic.openai import name_tester, run_shortener, semester_converter
from src.logic.semester import Semester
from src.core.semester import Semester
@dataclass
class Prof:
id: Optional[int] = None
_title: Optional[str] = None
firstname: Optional[str] = None
lastname: Optional[str] = None
fullname: Optional[str] = None
mail: Optional[str] = None
telnr: Optional[str] = None
id: int | None = None
_title: str | None = None
firstname: str | None = None
lastname: str | None = None
fullname: str | None = None
mail: str | None = None
telnr: str | None = None
# add function that sets the data based on a dict
def from_dict(self, data: dict[str, Union[str, int]]):
def from_dict(self, data: dict[str, Union[str, int]]) -> 'Prof':
for key, value in data.items():
if hasattr(self, key):
setattr(self, key, value)
@@ -37,27 +38,40 @@ class Prof:
self._title = value
# add function that sets the data from a tuple
def from_tuple(self, data: tuple[Union[str, int], ...]) -> "Prof":
setattr(self, "id", data[0])
setattr(self, "_title", data[1])
setattr(self, "firstname", data[2])
setattr(self, "lastname", data[3])
setattr(self, "fullname", data[4])
setattr(self, "mail", data[5])
setattr(self, "telnr", data[6])
def from_tuple(self, data: tuple[Union[int, str, None], ...]) -> 'Prof':
self.id = data[0] if data[0] is not None and isinstance(data[0], int) else None
self._title = str(data[1]) if data[1] is not None else None
self.firstname = str(data[2]) if data[2] is not None else None
self.lastname = str(data[3]) if data[3] is not None else None
self.fullname = str(data[4]) if data[4] is not None else None
self.mail = str(data[5]) if data[5] is not None else None
self.telnr = str(data[6]) if data[6] is not None else None
return self
def name(self, comma: bool = False) -> Optional[str]:
if self.firstname is None and self.lastname is None:
if "," in self.fullname:
self.firstname = self.fullname.split(",")[1].strip()
self.lastname = self.fullname.split(",")[0].strip()
if self.fullname and "," in self.fullname:
parts = self.fullname.split(",")
if len(parts) >= 2:
self.firstname = parts[1].strip()
self.lastname = parts[0].strip()
else:
return self.fullname
if comma:
return f"{self.lastname}, {self.firstname}"
return f"{self.lastname} {self.firstname}"
if self.lastname and self.firstname:
return f"{self.lastname}, {self.firstname}"
elif self.lastname:
return self.lastname
elif self.firstname:
return f", {self.firstname}"
elif self.lastname and self.firstname:
return f"{self.lastname} {self.firstname}"
elif self.lastname:
return self.lastname
elif self.firstname:
return self.firstname
return self.fullname
@dataclass
@@ -78,7 +92,7 @@ class BookData:
in_apparat: bool | None = False
adis_idn: str | None = None
old_book: Any | None = None
media_type: str | None = None #
media_type: str | None = None
in_library: bool | None = None # whether the book is in the library or not
medianr: int | None = None # Media number in the library system
@@ -89,15 +103,17 @@ class BookData:
if isinstance(self.language, list) and self.language:
self.language = [lang.strip() for lang in self.language if lang.strip()]
self.language = ",".join(self.language)
self.year = regex.sub(r"[^\d]", "", str(self.year)) if self.year else None
if self.year is not None:
year_str = regex.sub(r"[^\d]", "", str(self.year))
self.year = int(year_str) if year_str else None
self.in_library = True if self.signature else False
def from_dict(self, data: dict) -> "BookData":
def from_dict(self, data: dict[str, Any]) -> 'BookData':
for key, value in data.items():
setattr(self, key, value)
return self
def merge(self, other: "BookData") -> "BookData":
def merge(self, other: BookData) -> BookData:
for key, value in other.__dict__.items():
# merge lists, if the attribute is a list, extend it
if isinstance(value, list):
@@ -131,24 +147,26 @@ class BookData:
del data_dict["old_book"]
return json.dumps(data_dict, ensure_ascii=False)
def from_dataclass(self, dataclass: Optional[Any]) -> None:
if dataclass is None:
def from_dataclass(self, data_obj: Optional[Any]) -> None:
if data_obj is None:
return
for key, value in dataclass.__dict__.items():
for key, value in data_obj.__dict__.items():
setattr(self, key, value)
def get_book_type(self) -> str:
if "Online" in self.pages:
if self.pages and "Online" in self.pages:
return "eBook"
else:
return "Druckausgabe"
return "Druckausgabe"
def from_string(self, data: str) -> "BookData":
def from_string(self, data: str) -> 'BookData':
ndata = json.loads(data)
# Create a new BookData instance and set its attributes
book_data = BookData()
for key, value in ndata.items():
setattr(book_data, key, value)
return book_data
return BookData(**ndata)
def from_LehmannsSearchResult(self, result: Any) -> "BookData":
def from_LehmannsSearchResult(self, result: Any) -> 'BookData':
self.title = result.title
self.author = "; ".join(result.authors) if result.authors else None
self.edition = str(result.edition) if result.edition else None
@@ -170,7 +188,7 @@ class BookData:
def edition_number(self) -> Optional[int]:
if self.edition is None:
return 0
match = regex.search(r"(\d+)", self.edition)
match = regex.search(r"(\d+)", self.edition or "")
if match:
return int(match.group(1))
return 0
@@ -178,10 +196,10 @@ class BookData:
@dataclass
class MailData:
subject: Optional[str] = None
body: Optional[str] = None
mailto: Optional[str] = None
prof: Optional[str] = None
subject: str | None
body: str | None
mailto: str | None
prof: str | None
class Subjects(Enum):
@@ -215,13 +233,13 @@ class Subjects(Enum):
return self.value[0]
@property
def name(self) -> str:
def subject_name(self) -> str:
return self.value[1]
@classmethod
def get_index(cls, name: str) -> Optional[int]:
for i in cls:
if i.name == name:
if i.subject_name == name:
return i.id - 1
return None
@@ -243,7 +261,7 @@ class Apparat:
prof_id_adis: str | None = None
konto: int | None = None
def from_tuple(self, data: tuple[Any, ...]) -> "Apparat":
def from_tuple(self, data: tuple[Any, ...]) -> Apparat:
self.id = data[0]
self.name = data[1]
self.prof_id = data[2]
@@ -264,8 +282,7 @@ class Apparat:
def get_semester(self) -> Optional[str]:
if self.extend_until is not None:
return self.extend_until
else:
return self.created_semester
return self.created_semester
@dataclass
@@ -275,7 +292,7 @@ class ELSA:
semester: str | None = None
prof_id: int | None = None
def from_tuple(self, data: tuple[Any, ...]) -> "ELSA":
def from_tuple(self, data: tuple[Any, ...]) -> ELSA:
self.id = data[0]
self.date = data[1]
self.semester = data[2]
@@ -291,27 +308,27 @@ class ApparatData:
@dataclass
class XMLMailSubmission:
name: Optional[str] = None
lastname: Optional[str] = None
title: Optional[str] = None
telno: Optional[int] = None
email: Optional[str] = None
app_name: Optional[str] = None
subject: Optional[str] = None
semester: Optional[Semester] = None
books: Optional[list[BookData]] = None
name: str | None
lastname: str | None
title: str | None
telno: int | None
email: str | None
app_name: str | None
subject: str | None
semester: Semester | None
books: list[BookData] | None
@dataclass
class Book:
author: str = None
year: str = None
edition: str = None
title: str = None
location: str = None
publisher: str = None
signature: str = None
internal_notes: str = None
author: str | None
year: str | None
edition: str | None
title: str | None
location: str | None
publisher: str | None
signature: str | None
internal_notes: str | None
@property
def has_signature(self) -> bool:
@@ -329,7 +346,7 @@ class Book:
self.publisher == "",
self.signature == "",
self.internal_notes == "",
]
],
)
def from_dict(self, data: dict[str, Any]):
@@ -356,24 +373,29 @@ class Book:
@dataclass
class SemapDocument:
subject: str = None
phoneNumber: int = None
mail: str = None
title: str = None
title_suggestions: list[str] = None
semester: Union[str, Semester] = None
books: list[Book] = None
subject: str | None = None
phoneNumber: int | None = None
mail: str | None = None
title: str | None = None
personName: str | None = None
personTitle: str | None = None
title_suggestions: list[str] = field(default_factory=list)
semester: Union[str, 'Semester', None] = None
books: list[Book] = field(default_factory=list)
eternal: bool = False
personName: str = None
personTitle: str = None
title_length = 0
title_max_length = 0
title_length: int = 0
title_max_length: int = 0
def __post_init__(self):
self.title_suggestions = []
def __post_init__(self) -> None:
"""."""
if self.phoneNumber is not None:
phone_str = regex.sub(r"[^\d]", "", str(self.phoneNumber))
self.phoneNumber = int(phone_str) if phone_str else None
@property
def nameSetter(self):
from src.services.openai import name_tester, run_shortener
data = name_tester(self.personTitle)
name = f"{data['last_name']}, {data['first_name']}"
if data["title"] is not None:
@@ -389,18 +411,19 @@ class SemapDocument:
self.title_suggestions.append(suggestion["shortened_string"])
else:
self.title_suggestions = []
pass
@property
def renameSemester(self) -> None:
if self.semester:
from src.services.openai import semester_converter
if self.semester and isinstance(self.semester, str):
if ", Dauer" in self.semester:
self.semester = self.semester.split(",")[0]
self.eternal = True
self.semester = Semester().from_string(self.semester)
else:
self.semester = Semester().from_string(
semester_converter(self.semester)
semester_converter(self.semester),
)
@property
@@ -408,3 +431,55 @@ class SemapDocument:
if self.books is not None:
return [book.signature for book in self.books if book.has_signature]
return []
@dataclass
class ELSA_Mono:
authorName: str
year: int
signature: str
page_from: int
page_to: int
edition: str | None = None
@dataclass
class ELSA_Journal:
authorName: str
year: int
issue: str
page_from: int
page_to: int
journal_title: str
article_title: str
signature: str
@dataclass
class Person:
firstName: str
lastName: str
personTitle: str | None = None
@property
def fullName_LNFN(self) -> str:
return f"{self.lastName}, {self.firstName}"
@dataclass
class ELSA_Editorial:
# TODO: add dataclass fields
pass
@dataclass
class ELSADocument:
mail: str = None
personTitle: str = None
personName: Optional[str] = None
def __post_init__(self) -> None:
"""."""
self.mail = self.mail.strip() if self.mail else None
self.personTitle = self.personTitle.strip() if self.personTitle else None
self.personName = self.personName.strip() if self.personName else None

View File

@@ -1,4 +1,4 @@
"""Semester helper class
"""Semester helper class.
A small utility around the *German* academic calendar that distinguishes
between *Wintersemester* (WiSe) and *Sommersemester* (SoSe).
@@ -7,7 +7,7 @@ Key points
----------
* A **`Semester`** is identified by a *term* ("SoSe" or "WiSe") and the last two
digits of the calendar year in which the term *starts*.
* Formatting **never** pads the year with a leading zero so ``6`` stays ``6``.
* Formatting **never** pads the year with a leading zero - so ``6`` stays ``6``.
* ``offset(n)`` and the static ``generate_missing`` reliably walk the timeline
one semester at a time with correct year transitions:
@@ -26,13 +26,13 @@ class Semester:
"""Represents a German university semester (WiSe or SoSe)."""
# ------------------------------------------------------------------
# Classlevel defaults will be *copied* to each instance and then
# Class-level defaults - will be *copied* to each instance and then
# potentially overwritten in ``__init__``.
# ------------------------------------------------------------------
_year: int | None = int(str(datetime.datetime.now().year)[2:]) # 24 → 24
_semester: str | None = None # "WiSe" or "SoSe" set later
_month: int | None = datetime.datetime.now().month
value: str | None = None # Humanreadable label, e.g. "WiSe 23/24"
_year: int | None = None # Will be set in __post_init__
_semester: str | None = None # "WiSe" or "SoSe" - set later
_month: int | None = None # Will be set in __post_init__
value: str | None = None # Human-readable label, e.g. "WiSe 23/24"
# ------------------------------------------------------------------
# Construction helpers
@@ -54,11 +54,23 @@ class Semester:
self.__post_init__()
def __post_init__(self) -> None: # noqa: D401 keep original name
if self._year is None:
self._year = int(str(datetime.datetime.now().year)[2:])
def __post_init__(self) -> None:
now = datetime.datetime.now()
if self._month is None:
self._month = datetime.datetime.now().month
self._month = now.month
if self._year is None:
# Extract last 2 digits of current year
current_year = int(str(now.year)[2:])
# For winter semester in Jan-Mar, we need to use the previous year
# because WiSe started in October of the previous calendar year
if self._month <= 3:
self._year = (current_year - 1) % 100
else:
self._year = current_year
if self._semester is None:
self._generate_semester_from_month()
self._compute_value()
@@ -66,7 +78,7 @@ class Semester:
# ------------------------------------------------------------------
# Dunder helpers
# ------------------------------------------------------------------
def __str__(self) -> str: # noqa: D401 keep original name
def __str__(self) -> str:
return self.value or "<invalid Semester>"
def __repr__(self) -> str: # Helpful for debugging lists
@@ -77,21 +89,27 @@ class Semester:
# ------------------------------------------------------------------
def _generate_semester_from_month(self) -> None:
"""Infer *WiSe* / *SoSe* from the month attribute."""
self._semester = "WiSe" if (self._month <= 3 or self._month > 9) else "SoSe"
if self._month is not None:
self._semester = "WiSe" if (self._month <= 3 or self._month > 9) else "SoSe"
else:
self._semester = "WiSe" # Default value if month is None
def _compute_value(self) -> None:
"""Humanreadable semester label e.g. ``WiSe 23/24`` or ``SoSe 24``."""
year = self._year
if self._semester == "WiSe":
next_year = (year + 1) % 100 # wrap 99 → 0
self.value = f"WiSe {year}/{next_year}"
else: # SoSe
self.value = f"SoSe {year}"
"""Human-readable semester label - e.g. ``WiSe 23/24`` or ``SoSe 24``."""
if self._year is not None:
year = self._year
if self._semester == "WiSe":
next_year = (year + 1) % 100 # wrap 99 → 0
self.value = f"WiSe {year}/{next_year}"
else: # SoSe
self.value = f"SoSe {year}"
else:
self.value = "<invalid Semester>"
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def offset(self, value: int) -> "Semester":
def offset(self, value: int) -> Semester:
"""Return a new :class:`Semester` *value* steps away.
The algorithm maps every semester to a monotonically increasing
@@ -104,10 +122,12 @@ class Semester:
if value == 0:
return Semester(self._year, self._semester)
if self._year is None:
raise ValueError("Cannot offset from a semester with no year")
current_idx = self._year * 2 + (0 if self._semester == "SoSe" else 1)
target_idx = current_idx + value
if target_idx < 0:
raise ValueError("offset would result in a negative year not supported")
raise ValueError("offset would result in a negative year - not supported")
new_year, semester_bit = divmod(target_idx, 2)
new_semester = "SoSe" if semester_bit == 0 else "WiSe"
@@ -116,7 +136,7 @@ class Semester:
# ------------------------------------------------------------------
# Comparison helpers
# ------------------------------------------------------------------
def isPastSemester(self, current: "Semester") -> bool:
def is_past_semester(self, current: Semester) -> bool:
log.debug(f"Comparing {self} < {current}")
if self.year < current.year:
return True
@@ -126,7 +146,7 @@ class Semester:
) # WiSe before next SoSe
return False
def isFutureSemester(self, current: "Semester") -> bool:
def is_future_semester(self, current: Semester) -> bool:
if self.year > current.year:
return True
if self.year == current.year:
@@ -135,44 +155,48 @@ class Semester:
) # SoSe after WiSe of same year
return False
def isMatch(self, other: "Semester") -> bool:
def is_match(self, other: Semester) -> bool:
return self.year == other.year and self.semester == other.semester
# ------------------------------------------------------------------
# Convenience properties
# ------------------------------------------------------------------
@property
def next(self) -> "Semester":
def next(self) -> Semester:
return self.offset(1)
@property
def previous(self) -> "Semester":
def previous(self) -> Semester:
return self.offset(-1)
@property
def year(self) -> int:
if self._year is None:
raise ValueError("Year is not set for this semester")
return self._year
@property
def semester(self) -> str:
if self._semester is None:
raise ValueError("Semester is not set for this semester")
return self._semester
# ------------------------------------------------------------------
# Static helpers
# ------------------------------------------------------------------
@staticmethod
def generate_missing(start: "Semester", end: "Semester") -> list[str]:
def generate_missing(start: Semester, end: Semester) -> list[str]:
"""Return all consecutive semesters from *start* to *end* (inclusive)."""
if not isinstance(start, Semester) or not isinstance(end, Semester):
raise TypeError("start and end must be Semester instances")
if start.isFutureSemester(end) and not start.isMatch(end):
if start.is_future_semester(end) and not start.is_match(end):
raise ValueError("'start' must not be after 'end'")
chain: list[Semester] = [start.value]
chain: list[str] = [str(start)]
current = start
while not current.isMatch(end):
while not current.is_match(end):
current = current.next
chain.append(current.value)
chain.append(str(current))
if len(chain) > 1000: # sanity guard
raise RuntimeError("generate_missing exceeded sane iteration limit")
return chain
@@ -181,10 +205,10 @@ class Semester:
# Parsing helper
# ------------------------------------------------------------------
@classmethod
def from_string(cls, s: str) -> "Semester":
"""Parse a humanreadable semester label and return a :class:`Semester`.
def from_string(cls, s: str) -> Semester:
"""Parse a human-readable semester label and return a :class:`Semester`.
Accepted formats (caseinsensitive)::
Accepted formats (case-insensitive)::
"SoSe <YY>" → SoSe of year YY
"WiSe <YY>/<YY+1>" → Winter term starting in YY
@@ -199,7 +223,7 @@ class Semester:
m = re.fullmatch(pattern, s, flags=re.IGNORECASE)
if not m:
raise ValueError(
"invalid semester string format expected 'SoSe YY' or 'WiSe YY/YY' (spacing flexible)"
"invalid semester string format - expected 'SoSe YY' or 'WiSe YY/YY' (spacing flexible)",
)
term_raw, y1_str, y2_str = m.groups()
@@ -209,7 +233,7 @@ class Semester:
if term == "SoSe":
if y2_str is not None:
raise ValueError(
"SoSe string should not contain '/' followed by a second year"
"SoSe string should not contain '/' followed by a second year",
)
return cls(year, "SoSe")
@@ -223,7 +247,7 @@ class Semester:
return cls(year, "WiSe")
# ------------------------- quick selftest -------------------------
# ------------------------- quick self-test -------------------------
if __name__ == "__main__":
# Chain generation demo ------------------------------------------------
s_start = Semester(6, "SoSe") # SoSe 6
@@ -245,4 +269,5 @@ if __name__ == "__main__":
]
for ex in examples:
parsed = Semester.from_string(ex)
print(f"'{ex}'{parsed} ({parsed.year=}, {parsed.semester=})")
# debug: demonstration output (disabled)
# print(f"'{ex}' → {parsed} ({parsed.year=}, {parsed.semester=})")

View File

@@ -28,7 +28,7 @@ from src.errors import AppPresentError, NoResultError
from src.core.models import ELSA, Apparat, ApparatData, BookData, Prof
from src.core.constants import SEMAP_MEDIA_ACCOUNTS
from src.core.semester import Semester
from src.shared.logging import log
from src.shared.logging import log, get_bloat_logger, preview
from src.utils.blob import create_blob
ascii_lowercase = lower + digits + punctuation
@@ -123,7 +123,7 @@ class Database:
try:
if self.db_path is not None:
self.run_migrations()
except Exception as e:
except (sql.Error, OSError, IOError) as e:
log.error(f"Error while running migrations: {e}")
# --- Migration helpers integrated into Database ---
@@ -212,9 +212,9 @@ class Database:
).__str__()
return result[0]
def getElsaMediaType(self, id):
def getElsaMediaType(self, media_id):
query = "SELECT type FROM elsa_media WHERE id=?"
return self.query_db(query, (id,), one=True)[0]
return self.query_db(query, (media_id,), one=True)[0]
def get_db_contents(self) -> Union[List[Tuple[Any]], None]:
"""
@@ -732,7 +732,12 @@ class Database:
str: The filename of the recreated file
"""
blob = self.getBlob(filename, app_id)
log.debug(blob)
bloat = get_bloat_logger()
try:
bloat.debug("Recreated file blob size: {} bytes", len(blob))
bloat.debug("Recreated file blob (preview): {}", preview(blob, 2000))
except (TypeError, UnicodeDecodeError, ValueError):
bloat.debug("Recreated file blob (preview): {}", preview(blob, 2000))
tempdir = settings.database.temp.expanduser()
if not tempdir.exists():
tempdir.mkdir(parents=True, exist_ok=True)
@@ -985,16 +990,16 @@ class Database:
person = Prof()
return person.from_tuple(data)
def getProf(self, id) -> Prof:
def getProf(self, prof_id) -> Prof:
"""Get a professor based on the id
Args:
id ([type]): the id of the professor
prof_id ([type]): the id of the professor
Returns:
Prof: a Prof object containing the data of the professor
"""
data = self.query_db("SELECT * FROM prof WHERE id=?", (id,), one=True)
data = self.query_db("SELECT * FROM prof WHERE id=?", (prof_id,), one=True)
return Prof().from_tuple(data)
def getProfs(self) -> list[Prof]:
@@ -1145,22 +1150,25 @@ class Database:
Returns:
Optional[int]: the id of the apparat
"""
log.debug(apparat)
bloat = get_bloat_logger()
bloat.debug("Apparat (preview): {}", preview(apparat, 500))
app = apparat.apparat
prof = apparat.prof
present_prof = self.getProfByName(prof.name())
prof_id = present_prof.id
log.debug(present_prof)
bloat.debug("Present prof: {}", preview(present_prof, 300))
app_id = self.getApparatId(app.name)
if app_id:
return AppPresentError(app_id)
if not prof_id:
log.debug("prof id not present, creating prof with data", prof)
bloat.debug(
"prof id not present, creating prof with data: {}", preview(prof, 300)
)
prof_id = self.createProf(prof)
log.debug(prof_id)
log.debug("prof_id: {}", preview(prof_id, 50))
query = f"INSERT OR IGNORE INTO semesterapparat (appnr, name, erstellsemester, dauer, prof_id, fach,deletion_status,konto) VALUES ('{app.appnr}', '{app.name}', '{app.created_semester}', '{app.eternal}', {prof_id}, '{app.subject}', '{0}', '{SEMAP_MEDIA_ACCOUNTS[app.appnr]}')"
log.debug(query)
log.debug("Apparat insert query: {}", preview(query, 500))
self.query_db(query)
return None
@@ -1178,7 +1186,7 @@ class Database:
)
ret = []
for i in data:
log.debug(i)
log.debug("Apparat row: {}", preview(i, 200))
ret.append(Apparat().from_tuple(i))
return ret
@@ -1270,17 +1278,17 @@ class Database:
# print(apparat_nr, app_id)
self.query_db("UPDATE media SET deleted=1 WHERE app_id=?", (app_id,))
def isEternal(self, id):
def isEternal(self, apparat_id):
"""check if the apparat is eternal (dauerapparat)
Args:
id (int): the id of the apparat to be checked
apparat_id (int): the id of the apparat to be checked
Returns:
int: the state of the apparat
"""
return self.query_db(
"SELECT dauer FROM semesterapparat WHERE appnr=?", (id,), one=True
"SELECT dauer FROM semesterapparat WHERE appnr=?", (apparat_id,), one=True
)
def getApparatName(self, app_id: Union[str, int], prof_id: Union[str, int]):
@@ -1318,7 +1326,11 @@ class Database:
apparat_data.apparat.apparat_id_adis,
apparat_data.apparat.appnr,
)
log.debug(f"Updating apparat with query {query} and params {params}")
log.debug(
"Updating apparat: query: {} params: {}",
preview(query, 200),
preview(params, 300),
)
self.query_db(query, params)
def checkApparatExists(self, app_name: str):
@@ -1792,7 +1804,8 @@ class Database:
###
def createProf(self, profdata: Prof):
log.debug(profdata)
bloat = get_bloat_logger()
bloat.debug("Creating profdata: {}", preview(profdata, 500))
conn = self.connect()
cursor = conn.cursor()
fname = profdata.firstname
@@ -1803,7 +1816,7 @@ class Database:
title = profdata.title
query = "INSERT INTO prof (fname, lname, fullname, mail, telnr, titel) VALUES (?,?,?,?,?,?)"
log.debug(query)
log.debug("DB query: {}", preview(query, 200))
cursor.execute(query, (fname, lname, fullname, mail, telnr, title))
conn.commit()
@@ -1848,7 +1861,7 @@ class Database:
else:
fullname = profdata.name()
query = "SELECT id FROM prof WHERE fullname = ?"
log.debug(query)
log.debug("DB query: {}", preview(query, 200))
cursor.execute(query, (fullname,))
result = cursor.fetchone()
@@ -1866,7 +1879,7 @@ class Database:
conn = self.connect()
cursor = conn.cursor()
query = "SELECT * FROM prof WHERE fullname = ?"
log.debug(query)
log.debug("DB query: {}", preview(query, 200))
result = cursor.execute(query, (fullname,)).fetchone()
if result:

View File

@@ -1,2 +1 @@

View File

@@ -1,6 +1,7 @@
import os
from datetime import datetime
from os.path import basename
from pathlib import Path
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
@@ -69,20 +70,20 @@ class SemesterDocument:
full: bool = False,
):
assert isinstance(apparats, list), SemesterError(
"Apparats must be a list of tuples"
"Apparats must be a list of tuples",
)
assert all(isinstance(apparat, tuple) for apparat in apparats), SemesterError(
"Apparats must be a list of tuples"
"Apparats must be a list of tuples",
)
assert all(isinstance(apparat[0], int) for apparat in apparats), SemesterError(
"Apparat numbers must be integers"
"Apparat numbers must be integers",
)
assert all(isinstance(apparat[1], str) for apparat in apparats), SemesterError(
"Apparat names must be strings"
"Apparat names must be strings",
)
assert isinstance(semester, str), SemesterError("Semester must be a string")
assert "." not in filename and isinstance(filename, str), SemesterError(
"Filename must be a string and not contain an extension"
"Filename must be a string and not contain an extension",
)
self.doc = Document()
self.apparats = apparats
@@ -108,8 +109,7 @@ class SemesterDocument:
log.info("Document printed")
def set_table_border(self, table):
"""
Adds a full border to the table.
"""Adds a full border to the table.
:param table: Table object to which the border will be applied.
"""
@@ -150,7 +150,8 @@ class SemesterDocument:
trPr = row._tr.get_or_add_trPr() # Get or add the <w:trPr> element
trHeight = OxmlElement("w:trHeight")
trHeight.set(
qn("w:val"), str(int(Pt(15).pt * 20))
qn("w:val"),
str(int(Pt(15).pt * 20)),
) # Convert points to twips
trHeight.set(qn("w:hRule"), "exact") # Use "exact" for fixed height
trPr.append(trHeight)
@@ -233,7 +234,7 @@ class SemesterDocument:
self.save_document(self.filename + ".docx")
docpath = os.path.abspath(self.filename + ".docx")
doc = word.Documents.Open(docpath)
curdir = os.getcwd()
curdir = Path.cwd()
doc.SaveAs(f"{curdir}/{self.filename}.pdf", FileFormat=17)
doc.Close()
word.Quit()
@@ -317,7 +318,7 @@ class SemapSchilder:
self.save_document()
docpath = os.path.abspath(f"{self.filename}.docx")
doc = word.Documents.Open(docpath)
curdir = os.getcwd()
curdir = Path.cwd()
doc.SaveAs(f"{curdir}/{self.filename}.pdf", FileFormat=17)
doc.Close()
word.Quit()

View File

@@ -1,2 +1,2 @@
# import basic error classes
from .DatabaseErrors import *
from .DatabaseErrors import * # noqa: F403

View File

@@ -1,631 +0,0 @@
import re
import xml.etree.ElementTree as ET
from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, Iterable, List, Optional, Tuple, Union
import requests
from requests.adapters import HTTPAdapter
# centralized logging used via src.shared.logging
from src.logic.dataclass import BookData
from src.shared.logging import log
log # ensure imported logger is referenced
# -----------------------
# Dataclasses
# -----------------------
# --- MARC XML structures ---
@dataclass
class ControlField:
tag: str
value: str
@dataclass
class SubField:
code: str
value: str
@dataclass
class DataField:
tag: str
ind1: str = " "
ind2: str = " "
subfields: List[SubField] = field(default_factory=list)
@dataclass
class MarcRecord:
leader: str
controlfields: List[ControlField] = field(default_factory=list)
datafields: List[DataField] = field(default_factory=list)
# --- SRU record wrapper ---
@dataclass
class Record:
recordSchema: str
recordPacking: str
recordData: MarcRecord
recordPosition: int
@dataclass
class EchoedSearchRequest:
version: str
query: str
maximumRecords: int
recordPacking: str
recordSchema: str
@dataclass
class SearchRetrieveResponse:
version: str
numberOfRecords: int
records: List[Record] = field(default_factory=list)
echoedSearchRetrieveRequest: Optional[EchoedSearchRequest] = None
# -----------------------
# Parser
# -----------------------
ZS = "http://www.loc.gov/zing/srw/"
MARC = "http://www.loc.gov/MARC21/slim"
NS = {"zs": ZS, "marc": MARC}
def _text(elem: Optional[ET.Element]) -> str:
return (elem.text or "") if elem is not None else ""
def _req_text(parent: ET.Element, path: str) -> Optional[str]:
el = parent.find(path, NS)
if el is None or el.text is None:
return None
return el.text
def parse_marc_record(record_el: ET.Element) -> MarcRecord:
"""
record_el is the <marc:record> element (default ns MARC in your sample)
"""
# leader
leader_text = _req_text(record_el, "marc:leader") or ""
# controlfields
controlfields: List[ControlField] = []
for cf in record_el.findall("marc:controlfield", NS):
tag = cf.get("tag", "").strip()
controlfields.append(ControlField(tag=tag, value=_text(cf)))
# datafields
datafields: List[DataField] = []
for df in record_el.findall("marc:datafield", NS):
tag = df.get("tag", "").strip()
ind1 = df.get("ind1") or " "
ind2 = df.get("ind2") or " "
subfields: List[SubField] = []
for sf in df.findall("marc:subfield", NS):
code = sf.get("code", "")
subfields.append(SubField(code=code, value=_text(sf)))
datafields.append(DataField(tag=tag, ind1=ind1, ind2=ind2, subfields=subfields))
return MarcRecord(
leader=leader_text, controlfields=controlfields, datafields=datafields
)
def parse_record(zs_record_el: ET.Element) -> Record:
recordSchema = _req_text(zs_record_el, "zs:recordSchema") or ""
recordPacking = _req_text(zs_record_el, "zs:recordPacking") or ""
# recordData contains a MARC <record> with default MARC namespace in your sample
recordData_el = zs_record_el.find("zs:recordData", NS)
if recordData_el is None:
raise ValueError("Missing zs:recordData")
marc_record_el = recordData_el.find("marc:record", NS)
if marc_record_el is None:
# If the MARC record uses default ns (xmlns="...") ElementTree still needs the ns-qualified name
# We already searched with prefix; this covers both default and prefixed cases.
raise ValueError("Missing MARC21 record inside zs:recordData")
marc_record = parse_marc_record(marc_record_el)
recordPosition = int(_req_text(zs_record_el, "zs:recordPosition") or "0")
return Record(
recordSchema=recordSchema,
recordPacking=recordPacking,
recordData=marc_record,
recordPosition=recordPosition,
)
def parse_echoed_request(root: ET.Element) -> Optional[EchoedSearchRequest]:
el = root.find("zs:echoedSearchRetrieveRequest", NS)
if el is None:
return None
# Be permissive with missing fields
version = _text(el.find("zs:version", NS))
query = _text(el.find("zs:query", NS))
maximumRecords_text = _text(el.find("zs:maximumRecords", NS)) or "0"
recordPacking = _text(el.find("zs:recordPacking", NS))
recordSchema = _text(el.find("zs:recordSchema", NS))
try:
maximumRecords = int(maximumRecords_text)
except ValueError:
maximumRecords = 0
return EchoedSearchRequest(
version=version,
query=query,
maximumRecords=maximumRecords,
recordPacking=recordPacking,
recordSchema=recordSchema,
)
def parse_search_retrieve_response(
xml_str: Union[str, bytes],
) -> SearchRetrieveResponse:
root = ET.fromstring(xml_str)
# Root is zs:searchRetrieveResponse
version = _req_text(root, "zs:version")
numberOfRecords = int(_req_text(root, "zs:numberOfRecords") or "0")
records_parent = root.find("zs:records", NS)
records: List[Record] = []
if records_parent is not None:
for r in records_parent.findall("zs:record", NS):
records.append(parse_record(r))
echoed = parse_echoed_request(root)
return SearchRetrieveResponse(
version=version,
numberOfRecords=numberOfRecords,
records=records,
echoedSearchRetrieveRequest=echoed,
)
# --- Query helpers over MarcRecord ---
def iter_datafields(
rec: MarcRecord,
tag: Optional[str] = None,
ind1: Optional[str] = None,
ind2: Optional[str] = None,
) -> Iterable[DataField]:
"""Yield datafields, optionally filtered by tag/indicators."""
for df in rec.datafields:
if tag is not None and df.tag != tag:
continue
if ind1 is not None and df.ind1 != ind1:
continue
if ind2 is not None and df.ind2 != ind2:
continue
yield df
def subfield_values(
rec: MarcRecord,
tag: str,
code: str,
*,
ind1: Optional[str] = None,
ind2: Optional[str] = None,
) -> List[str]:
"""All values for subfield `code` in every `tag` field (respecting indicators)."""
out: List[str] = []
for df in iter_datafields(rec, tag, ind1, ind2):
out.extend(sf.value for sf in df.subfields if sf.code == code)
return out
def first_subfield_value(
rec: MarcRecord,
tag: str,
code: str,
*,
ind1: Optional[str] = None,
ind2: Optional[str] = None,
default: Optional[str] = None,
) -> Optional[str]:
"""First value for subfield `code` in `tag` (respecting indicators)."""
for df in iter_datafields(rec, tag, ind1, ind2):
for sf in df.subfields:
if sf.code == code:
return sf.value
return default
def find_datafields_with_subfields(
rec: MarcRecord,
tag: str,
*,
where_all: Optional[Dict[str, str]] = None,
where_any: Optional[Dict[str, str]] = None,
casefold: bool = False,
ind1: Optional[str] = None,
ind2: Optional[str] = None,
) -> List[DataField]:
"""
Return datafields of `tag` whose subfields match constraints:
- where_all: every (code -> exact value) must be present
- where_any: at least one (code -> exact value) present
Set `casefold=True` for case-insensitive comparison.
"""
where_all = where_all or {}
where_any = where_any or {}
matched: List[DataField] = []
for df in iter_datafields(rec, tag, ind1, ind2):
# Map code -> list of values (with optional casefold applied)
vals: Dict[str, List[str]] = {}
for sf in df.subfields:
v = sf.value.casefold() if casefold else sf.value
vals.setdefault(sf.code, []).append(v)
ok = True
for c, v in where_all.items():
vv = v.casefold() if casefold else v
if c not in vals or vv not in vals[c]:
ok = False
break
if ok and where_any:
any_ok = any(
(c in vals) and ((v.casefold() if casefold else v) in vals[c])
for c, v in where_any.items()
)
if not any_ok:
ok = False
if ok:
matched.append(df)
return matched
def controlfield_value(
rec: MarcRecord, tag: str, default: Optional[str] = None
) -> Optional[str]:
"""Get the first controlfield value by tag (e.g., '001', '005')."""
for cf in rec.controlfields:
if cf.tag == tag:
return cf.value
return default
def datafields_value(
data: List[DataField], code: str, default: Optional[str] = None
) -> Optional[str]:
"""Get the first value for a specific subfield code in a list of datafields."""
for df in data:
for sf in df.subfields:
if sf.code == code:
return sf.value
return default
def datafield_value(
df: DataField, code: str, default: Optional[str] = None
) -> Optional[str]:
"""Get the first value for a specific subfield code in a datafield."""
for sf in df.subfields:
if sf.code == code:
return sf.value
return default
def _smart_join_title(a: str, b: Optional[str]) -> str:
"""
Join 245 $a and $b with MARC-style punctuation.
If $b is present, join with ' : ' unless either side already supplies punctuation.
"""
a = a.strip()
if not b:
return a
b = b.strip()
if a.endswith((":", ";", "/")) or b.startswith((":", ";", "/")):
return f"{a} {b}"
return f"{a} : {b}"
def subfield_values_from_fields(
fields: Iterable[DataField],
code: str,
) -> List[str]:
"""All subfield values with given `code` across a list of DataField."""
return [sf.value for df in fields for sf in df.subfields if sf.code == code]
def first_subfield_value_from_fields(
fields: Iterable[DataField],
code: str,
default: Optional[str] = None,
) -> Optional[str]:
"""First subfield value with given `code` across a list of DataField."""
for df in fields:
for sf in df.subfields:
if sf.code == code:
return sf.value
return default
def subfield_value_pairs_from_fields(
fields: Iterable[DataField],
code: str,
) -> List[Tuple[DataField, str]]:
"""
Return (DataField, value) pairs for all subfields with `code`.
Useful if you need to know which field a value came from.
"""
out: List[Tuple[DataField, str]] = []
for df in fields:
for sf in df.subfields:
if sf.code == code:
out.append((df, sf.value))
return out
def book_from_marc(rec: MarcRecord) -> BookData:
# PPN from controlfield 001
ppn = controlfield_value(rec, "001")
# Title = 245 $a + 245 $b (if present)
t_a = first_subfield_value(rec, "245", "a")
t_b = first_subfield_value(rec, "245", "b")
title = _smart_join_title(t_a, t_b) if t_a else None
# Signature = 924 where $9 == "Frei 129" → take that field's $g
frei_fields = find_datafields_with_subfields(
rec, "924", where_all={"9": "Frei 129"}
)
signature = first_subfield_value_from_fields(frei_fields, "g")
# Year = 264 $c (prefer ind2="1" publication; fallback to any 264)
year = first_subfield_value(rec, "264", "c", ind2="1") or first_subfield_value(
rec, "264", "c"
)
isbn = subfield_values(rec, "020", "a")
mediatype = first_subfield_value(rec, "338", "a")
lang = subfield_values(rec, "041", "a")
authors = subfield_values(rec, "700", "a")
author = None
if authors:
author = "; ".join(authors)
return BookData(
ppn=ppn,
title=title,
signature=signature,
edition=first_subfield_value(rec, "250", "a") or "",
year=year,
pages=first_subfield_value(rec, "300", "a") or "",
publisher=first_subfield_value(rec, "264", "b") or "",
isbn=isbn,
language=lang,
link="",
author=author,
media_type=mediatype,
)
class SWBData(Enum):
URL = "https://sru.k10plus.de/opac-de-627!rec=1?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=marcxml"
ARGSCHEMA = "pica."
NAME = "SWB"
class DNBData(Enum):
URL = "https://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=MARC21-xml"
ARGSCHEMA = ""
NAME = "DNB"
class SRUSite(Enum):
SWB = SWBData
DNB = DNBData
RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative char set typically seen in RVK notations
def find_newer_edition(
swb_result: BookData, dnb_result: List[BookData]
) -> Optional[List[BookData]]:
"""
New edition if:
- year > swb.year OR
- edition_number > swb.edition_number
Additional guards & preferences:
- If both have signatures and they differ, skip (not the same work).
- For duplicates (same ppn): keep the one that has a signature, and
prefer a signature that matches swb_result.signature.
- If multiple remain: keep the single 'latest' by (year desc,
edition_number desc, best-signature-match desc, has-signature desc).
"""
def norm_sig(s: Optional[str]) -> str:
if not s:
return ""
# normalize: lowercase, collapse whitespace, keep alnum + a few separators
s = s.lower()
s = re.sub(r"\s+", " ", s).strip()
# remove obvious noise; adjust if your signature format differs
s = re.sub(r"[^a-z0-9\-_/\. ]+", "", s)
return s
def has_sig(b: BookData) -> bool:
return bool(getattr(b, "signature", None))
def sig_matches_swb(b: BookData) -> bool:
if not has_sig(b) or not has_sig(swb_result):
return False
return norm_sig(b.signature) == norm_sig(swb_result.signature)
def strictly_newer(b: BookData) -> bool:
by_year = (
b.year is not None
and swb_result.year is not None
and b.year > swb_result.year
)
by_edition = (
b.edition_number is not None
and swb_result.edition_number is not None
and b.edition_number > swb_result.edition_number
)
return by_year or by_edition
swb_sig_norm = norm_sig(getattr(swb_result, "signature", None))
# 1) Filter to same-work AND newer
candidates: List[BookData] = []
for b in dnb_result:
# Skip if both signatures exist and don't match (different work)
b_sig = getattr(b, "signature", None)
if b_sig and swb_result.signature:
if norm_sig(b_sig) != swb_sig_norm:
continue # not the same work
# Keep only if newer by rules
if strictly_newer(b):
candidates.append(b)
if not candidates:
return None
# 2) Dedupe by PPN, preferring signature (and matching signature if possible)
by_ppn: dict[Optional[str], BookData] = {}
for b in candidates:
key = getattr(b, "ppn", None)
prev = by_ppn.get(key)
if prev is None:
by_ppn[key] = b
continue
# Compute preference score for both
def ppn_pref_score(x: BookData) -> tuple[int, int]:
# (signature matches swb, has signature)
return (1 if sig_matches_swb(x) else 0, 1 if has_sig(x) else 0)
if ppn_pref_score(b) > ppn_pref_score(prev):
by_ppn[key] = b
deduped = list(by_ppn.values())
if not deduped:
return None
# 3) If multiple remain, keep only the latest one.
# Order: year desc, edition_number desc, signature-match desc, has-signature desc
def sort_key(b: BookData):
year = b.year if b.year is not None else -1
ed = b.edition_number if b.edition_number is not None else -1
sig_match = 1 if sig_matches_swb(b) else 0
sig_present = 1 if has_sig(b) else 0
return (year, ed, sig_match, sig_present)
best = max(deduped, key=sort_key)
return [best] if best else None
class Api:
def __init__(self, site: str, url: str, prefix: str):
self.site = site
self.url = url
self.prefix = prefix
# Reuse TCP connections across requests for better performance
self._session = requests.Session()
# Slightly larger connection pool for concurrent calls
adapter = HTTPAdapter(pool_connections=10, pool_maxsize=20)
self._session.mount("http://", adapter)
self._session.mount("https://", adapter)
def close(self):
try:
self._session.close()
except Exception:
pass
def __del__(self):
# Best-effort cleanup
self.close()
def get(self, query_args: Iterable[str]) -> List[Record]:
# if any query_arg ends with =, remove it
if self.site == "DNB":
args = [arg for arg in query_args if not arg.startswith("pica.")]
if args == []:
raise ValueError("DNB queries must include at least one search term")
query_args = args
# query_args = [f"{self.prefix}{arg}" for arg in query_args]
query = "+and+".join(query_args)
query = query.replace(" ", "%20").replace("&", "%26")
# query_args = [arg for arg in query_args if not arg.endswith("=")]
# query = "+and+".join(query_args)
# query = query.replace(" ", "%20").replace("&", "%26")
# insert the query into the url url is
url = self.url.format(query)
log.debug(url)
headers = {
"User-Agent": f"{self.site} SRU Client, <alexander.kirchner@ph-freiburg.de>",
"Accept": "application/xml",
"Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
}
# Use persistent session and set timeouts to avoid hanging
resp = self._session.get(url, headers=headers, timeout=(3.05, 60))
if resp.status_code != 200:
raise Exception(f"Error fetching data from SWB: {resp.status_code}")
# Parse using raw bytes (original behavior) to preserve encoding edge cases
sr = parse_search_retrieve_response(resp.content)
return sr.records
def getBooks(self, query_args: Iterable[str]) -> List[BookData]:
records: List[Record] = self.get(query_args)
# Avoid printing on hot paths; rely on logger if needed
log.debug(f"{self.site} found {len(records)} records for args={query_args}")
books: List[BookData] = []
# extract title from query_args if present
title = None
for arg in query_args:
if arg.startswith("pica.tit="):
title = arg.split("=")[1]
break
for rec in records:
book = book_from_marc(rec.recordData)
books.append(book)
if title:
books = [
b
for b in books
if b.title and b.title.lower().startswith(title.lower())
]
return books
def getLinkForBook(self, book: BookData) -> str:
# Not implemented: depends on catalog front-end; return empty string for now
return ""
class SWB(Api):
def __init__(self):
self.site = SWBData.NAME.value
self.url = SWBData.URL.value
self.prefix = SWBData.ARGSCHEMA.value
super().__init__(self.site, self.url, self.prefix)

View File

@@ -1,35 +1,8 @@
"""Sorting utilities for semester data."""
from .c_sort import custom_sort, sort_semesters_list
__all__ = [
"custom_sort",
"sort_semesters_list",
"APP_NRS",
"PROF_TITLES",
"SEMAP_MEDIA_ACCOUNTS",
"csv_to_list",
"ELSA",
"Apparat",
"ApparatData",
"BookData",
"Prof",
"Semester",
"SemapDocument",
"elsa_word_to_csv",
"pdf_to_semap",
"word_docx_to_csv",
"word_to_semap",
"ZoteroController",
"eml_to_semap",
]
from .c_sort import custom_sort, sort_semesters_list
from .constants import APP_NRS, PROF_TITLES, SEMAP_MEDIA_ACCOUNTS
from .csvparser import csv_to_list
from .dataclass import ELSA, Apparat, ApparatData, BookData, Prof
from .semester import Semester
from .wordparser import (
SemapDocument,
elsa_word_to_csv,
pdf_to_semap,
word_docx_to_csv,
word_to_semap,
)
from .xmlparser import eml_to_semap
from .zotero import ZoteroController

View File

@@ -1,213 +0,0 @@
APP_NRS = [i for i in range(1, 181)]
PROF_TITLES = [
"Dr. mult.",
"Dr. paed.",
"Dr. rer. pol.",
"Dr. sc. techn.",
"Drs.",
"Dr. agr.",
"Dr. habil.",
"Dr. oec.",
"Dr. med.",
"Dr. e. h.",
"Dr. oec. publ.",
"Dr. -Ing.",
"Dr. theol.",
"Dr. med. vet.",
"Dr. ing.",
"Dr. rer. nat.",
"Dr. des.",
"Dr. sc. mus.",
"Dr. h. c.",
"Dr. pharm.",
"Dr. med. dent.",
"Dr. phil. nat.",
"Dr. phil.",
"Dr. iur.",
"Dr.",
"Kein Titel",
]
SEMAP_MEDIA_ACCOUNTS = {
1: "1008000055",
2: "1008000188",
3: "1008000211",
4: "1008000344",
5: "1008000477",
6: "1008000500",
7: "1008000633",
8: "1008000766",
9: "1008000899",
10: "1008000922",
11: "1008001044",
12: "1008001177",
13: "1008001200",
14: "1008001333",
15: "1008001466",
16: "1008001599",
17: "1008001622",
18: "1008001755",
19: "1008001888",
20: "1008001911",
21: "1008002033",
22: "1008002166",
23: "1008002299",
24: "1008002322",
25: "1008002455",
26: "1008002588",
27: "1008002611",
28: "1008002744",
29: "1008002877",
30: "1008002900",
31: "1008003022",
32: "1008003155",
33: "1008003288",
34: "1008003311",
35: "1008003444",
36: "1008003577",
37: "1008003600",
38: "1008003733",
39: "1008003866",
40: "1008003999",
41: "1008004011",
42: "1008004144",
43: "1008004277",
44: "1008004300",
45: "1008004433",
46: "1008004566",
47: "1008004699",
48: "1008004722",
49: "1008004855",
50: "1008004988",
51: "1008005000",
52: "1008005133",
53: "1008005266",
54: "1008005399",
55: "1008005422",
56: "1008005555",
57: "1008005688",
58: "1008005711",
59: "1008005844",
60: "1008005977",
61: "1008006099",
62: "1008006122",
63: "1008006255",
64: "1008006388",
65: "1008006411",
66: "1008006544",
67: "1008006677",
68: "1008006700",
69: "1008006833",
70: "1008006966",
71: "1008007088",
72: "1008007111",
73: "1008007244",
74: "1008007377",
75: "1008007400",
76: "1008007533",
77: "1008007666",
78: "1008007799",
79: "1008007822",
80: "1008007955",
81: "1008008077",
82: "1008008100",
83: "1008008233",
84: "1008008366",
85: "1008008499",
86: "1008008522",
87: "1008008655",
88: "1008008788",
89: "1008008811",
90: "1008008944",
91: "1008009066",
92: "1008009199",
93: "1008009222",
94: "1008009355",
95: "1008009488",
96: "1008009511",
97: "1008009644",
98: "1008009777",
99: "1008009800",
100: "1008009933",
101: "1008010022",
102: "1008010155",
103: "1008010288",
104: "1008010311",
105: "1008010444",
106: "1008010577",
107: "1008010600",
108: "1008010733",
109: "1008010866",
110: "1008010999",
111: "1008011011",
112: "1008011144",
113: "1008011277",
114: "1008011300",
115: "1008011433",
116: "1008011566",
117: "1008011699",
118: "1008011722",
119: "1008011855",
120: "1008011988",
121: "1008012000",
122: "1008012133",
123: "1008012266",
124: "1008012399",
125: "1008012422",
126: "1008012555",
127: "1008012688",
128: "1008012711",
129: "1008012844",
130: "1008012977",
131: "1008013099",
132: "1008013122",
133: "1008013255",
134: "1008013388",
135: "1008013411",
136: "1008013544",
137: "1008013677",
138: "1008013700",
139: "1008013833",
140: "1008013966",
141: "1008014088",
142: "1008014111",
143: "1008014244",
144: "1008014377",
145: "1008014400",
146: "1008014533",
147: "1008014666",
148: "1008014799",
149: "1008014822",
150: "1008014955",
151: "1008015077",
152: "1008015100",
153: "1008015233",
154: "1008015366",
155: "1008015499",
156: "1008015522",
157: "1008015655",
158: "1008015788",
159: "1008015811",
160: "1008015944",
161: "1008016066",
162: "1008016199",
163: "1008016222",
164: "1008016355",
165: "1008016488",
166: "1008016511",
167: "1008016644",
168: "1008016777",
169: "1008016800",
170: "1008016933",
171: "1008017055",
172: "1008017188",
173: "1008017211",
174: "1008017344",
175: "1008017477",
176: "1008017500",
177: "1008017633",
178: "1008017766",
179: "1008017899",
180: "1008017922",
}

View File

@@ -1,23 +0,0 @@
import csv
from charset_normalizer import detect
def csv_to_list(path: str) -> list[str]:
"""
Extracts the data from a csv file and returns it as a pandas dataframe
"""
encoding = detect(open(path, "rb").read())["encoding"]
with open(path, newline="", encoding=encoding) as csvfile:
# if decoder fails to map, assign ""
reader = csv.reader(csvfile, delimiter=";", quotechar="|")
ret = []
for row in reader:
ret.append(row[0].replace('"', ""))
return ret
if __name__ == "__main__":
text = csv_to_list("C:/Users/aky547/Desktop/semap/71.csv")
# remove linebreaks
# #print(text)

View File

@@ -1,410 +0,0 @@
import json
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Optional, Union
import regex
from src.logic.openai import name_tester, run_shortener, semester_converter
from src.logic.semester import Semester
@dataclass
class Prof:
id: Optional[int] = None
_title: Optional[str] = None
firstname: Optional[str] = None
lastname: Optional[str] = None
fullname: Optional[str] = None
mail: Optional[str] = None
telnr: Optional[str] = None
# add function that sets the data based on a dict
def from_dict(self, data: dict[str, Union[str, int]]):
for key, value in data.items():
if hasattr(self, key):
setattr(self, key, value)
return self
@property
def title(self) -> str:
if self._title is None or self._title == "None":
return ""
return self._title
@title.setter
def title(self, value: str):
self._title = value
# add function that sets the data from a tuple
def from_tuple(self, data: tuple[Union[str, int], ...]) -> "Prof":
setattr(self, "id", data[0])
setattr(self, "_title", data[1])
setattr(self, "firstname", data[2])
setattr(self, "lastname", data[3])
setattr(self, "fullname", data[4])
setattr(self, "mail", data[5])
setattr(self, "telnr", data[6])
return self
def name(self, comma: bool = False) -> Optional[str]:
if self.firstname is None and self.lastname is None:
if "," in self.fullname:
self.firstname = self.fullname.split(",")[1].strip()
self.lastname = self.fullname.split(",")[0].strip()
else:
return self.fullname
if comma:
return f"{self.lastname}, {self.firstname}"
return f"{self.lastname} {self.firstname}"
@dataclass
class BookData:
ppn: str | None = None
title: str | None = None
signature: str | None = None
edition: str | None = None
link: str | None = None
isbn: Union[str, list[str], None] = field(default_factory=list)
author: str | None = None
language: Union[str, list[str], None] = field(default_factory=list)
publisher: str | None = None
place: str | None = None
year: int | None = None
pages: str | None = None
library_location: str | None = None
in_apparat: bool | None = False
adis_idn: str | None = None
old_book: Any | None = None
media_type: str | None = None #
in_library: bool | None = None # whether the book is in the library or not
medianr: int | None = None # Media number in the library system
def __post_init__(self):
self.library_location = (
str(self.library_location) if self.library_location else None
)
if isinstance(self.language, list) and self.language:
self.language = [lang.strip() for lang in self.language if lang.strip()]
self.language = ",".join(self.language)
self.year = regex.sub(r"[^\d]", "", str(self.year)) if self.year else None
self.in_library = True if self.signature else False
def from_dict(self, data: dict) -> "BookData":
for key, value in data.items():
setattr(self, key, value)
return self
def merge(self, other: "BookData") -> "BookData":
for key, value in other.__dict__.items():
# merge lists, if the attribute is a list, extend it
if isinstance(value, list):
current_value = getattr(self, key)
if current_value is None:
current_value = []
elif not isinstance(current_value, list):
current_value = [current_value]
# extend the list with the new values, but only if they are not already in the list
for v in value:
if v not in current_value:
current_value.append(v)
setattr(self, key, current_value)
if value is not None and (
getattr(self, key) is None or getattr(self, key) == ""
):
setattr(self, key, value)
# in language, drop all entries that are longer than 3 characters
if isinstance(self.language, list):
self.language = [lang for lang in self.language if len(lang) <= 4]
return self
@property
def to_dict(self) -> str:
"""Convert the dataclass to a dictionary."""
data_dict = {
key: value for key, value in self.__dict__.items() if value is not None
}
# remove old_book from data_dict
if "old_book" in data_dict:
del data_dict["old_book"]
return json.dumps(data_dict, ensure_ascii=False)
def from_dataclass(self, dataclass: Optional[Any]) -> None:
if dataclass is None:
return
for key, value in dataclass.__dict__.items():
setattr(self, key, value)
def get_book_type(self) -> str:
if "Online" in self.pages:
return "eBook"
else:
return "Druckausgabe"
def from_string(self, data: str) -> "BookData":
ndata = json.loads(data)
return BookData(**ndata)
def from_LehmannsSearchResult(self, result: Any) -> "BookData":
self.title = result.title
self.author = "; ".join(result.authors) if result.authors else None
self.edition = str(result.edition) if result.edition else None
self.link = result.url
self.isbn = (
result.isbn13
if isinstance(result.isbn13, list)
else [result.isbn13]
if result.isbn13
else []
)
self.pages = str(result.pages) if result.pages else None
self.publisher = result.publisher
self.year = str(result.year) if result.year else None
# self.pages = str(result.pages) if result.pages else None
return self
@property
def edition_number(self) -> Optional[int]:
if self.edition is None:
return 0
match = regex.search(r"(\d+)", self.edition)
if match:
return int(match.group(1))
return 0
@dataclass
class MailData:
subject: Optional[str] = None
body: Optional[str] = None
mailto: Optional[str] = None
prof: Optional[str] = None
class Subjects(Enum):
BIOLOGY = (1, "Biologie")
CHEMISTRY = (2, "Chemie")
GERMAN = (3, "Deutsch")
ENGLISH = (4, "Englisch")
PEDAGOGY = (5, "Erziehungswissenschaft")
FRENCH = (6, "Französisch")
GEOGRAPHY = (7, "Geographie")
HISTORY = (8, "Geschichte")
HEALTH_EDUCATION = (9, "Gesundheitspädagogik")
HTW = (10, "Haushalt / Textil")
ART = (11, "Kunst")
MATH_IT = (12, "Mathematik / Informatik")
MEDIAPEDAGOGY = (13, "Medien in der Bildung")
MUSIC = (14, "Musik")
PHILOSOPHY = (15, "Philosophie")
PHYSICS = (16, "Physik")
POLITICS = (17, "Politikwissenschaft")
PRORECTORATE = (18, "Prorektorat Lehre und Studium")
PSYCHOLOGY = (19, "Psychologie")
SOCIOLOGY = (20, "Soziologie")
SPORT = (21, "Sport")
TECHNIC = (22, "Technik")
THEOLOGY = (23, "Theologie")
ECONOMICS = (24, "Wirtschaftslehre")
@property
def id(self) -> int:
return self.value[0]
@property
def name(self) -> str:
return self.value[1]
@classmethod
def get_index(cls, name: str) -> Optional[int]:
for i in cls:
if i.name == name:
return i.id - 1
return None
@dataclass
class Apparat:
id: int | None = None
name: str | None = None
prof_id: int | None = None
subject: str | None = None
appnr: int | None = None
created_semester: str | None = None
extended_at: str | None = None
eternal: bool = False
extend_until: str | None = None
deleted: int | None = None
deleted_date: str | None = None
apparat_id_adis: str | None = None
prof_id_adis: str | None = None
konto: int | None = None
def from_tuple(self, data: tuple[Any, ...]) -> "Apparat":
self.id = data[0]
self.name = data[1]
self.prof_id = data[2]
self.subject = data[3]
self.appnr = data[4]
self.created_semester = data[5]
self.extended_at = data[6]
self.eternal = data[7]
self.extend_until = data[8]
self.deleted = data[9]
self.deleted_date = data[10]
self.apparat_id_adis = data[11]
self.prof_id_adis = data[12]
self.konto = data[13]
return self
@property
def get_semester(self) -> Optional[str]:
if self.extend_until is not None:
return self.extend_until
else:
return self.created_semester
@dataclass
class ELSA:
id: int | None = None
date: str | None = None
semester: str | None = None
prof_id: int | None = None
def from_tuple(self, data: tuple[Any, ...]) -> "ELSA":
self.id = data[0]
self.date = data[1]
self.semester = data[2]
self.prof_id = data[3]
return self
@dataclass
class ApparatData:
prof: Prof = field(default_factory=Prof)
apparat: Apparat = field(default_factory=Apparat)
@dataclass
class XMLMailSubmission:
name: Optional[str] = None
lastname: Optional[str] = None
title: Optional[str] = None
telno: Optional[int] = None
email: Optional[str] = None
app_name: Optional[str] = None
subject: Optional[str] = None
semester: Optional[Semester] = None
books: Optional[list[BookData]] = None
@dataclass
class Book:
author: str = None
year: str = None
edition: str = None
title: str = None
location: str = None
publisher: str = None
signature: str = None
internal_notes: str = None
@property
def has_signature(self) -> bool:
return self.signature is not None and self.signature != ""
@property
def is_empty(self) -> bool:
return all(
[
self.author == "",
self.year == "",
self.edition == "",
self.title == "",
self.location == "",
self.publisher == "",
self.signature == "",
self.internal_notes == "",
]
)
def from_dict(self, data: dict[str, Any]):
for key, value in data.items():
value = value.strip()
if value == "\u2002\u2002\u2002\u2002\u2002":
value = ""
if key == "Autorenname(n):Nachname, Vorname":
self.author = value
elif key == "Jahr/Auflage":
self.year = value.split("/")[0] if "/" in value else value
self.edition = value.split("/")[1] if "/" in value else ""
elif key == "Titel":
self.title = value
elif key == "Ort und Verlag":
self.location = value.split(",")[0] if "," in value else value
self.publisher = value.split(",")[1] if "," in value else ""
elif key == "Standnummer":
self.signature = value.strip()
elif key == "Interne Vermerke":
self.internal_notes = value
@dataclass
class SemapDocument:
subject: str = None
phoneNumber: int = None
mail: str = None
title: str = None
title_suggestions: list[str] = None
semester: Union[str, Semester] = None
books: list[Book] = None
eternal: bool = False
personName: str = None
personTitle: str = None
title_length = 0
title_max_length = 0
def __post_init__(self):
self.title_suggestions = []
@property
def nameSetter(self):
data = name_tester(self.personTitle)
name = f"{data['last_name']}, {data['first_name']}"
if data["title"] is not None:
title = data["title"]
self.personTitle = title
self.personName = name
self.title_length = len(self.title) + 3 + len(self.personName.split(",")[0])
if self.title_length > 40:
name_len = len(self.personName.split(",")[0])
self.title_max_length = 38 - name_len
suggestions = run_shortener(self.title, self.title_max_length)
for suggestion in suggestions:
self.title_suggestions.append(suggestion["shortened_string"])
else:
self.title_suggestions = []
pass
@property
def renameSemester(self) -> None:
if self.semester:
if ", Dauer" in self.semester:
self.semester = self.semester.split(",")[0]
self.eternal = True
self.semester = Semester().from_string(self.semester)
else:
self.semester = Semester().from_string(
semester_converter(self.semester)
)
@property
def signatures(self) -> list[str]:
if self.books is not None:
return [book.signature for book in self.books if book.has_signature]
return []

View File

@@ -1,45 +0,0 @@
import csv
import pandas as pd
from docx import Document
def csv_to_list(path: str) -> list[str]:
"""
Extracts the data from a csv file and returns it as a pandas dataframe
"""
with open(path, newline="") as csvfile:
reader = csv.reader(csvfile, delimiter=";", quotechar="|")
data = []
for row in reader:
for i in range(len(row)):
row[i] = row[i].replace('"', "")
data.append(row)
ret = []
for i in data:
ret.append(i[0])
return ret
def word_docx_to_csv(path) -> pd.DataFrame:
doc = Document(path)
tables = doc.tables
m_data = []
for table in tables:
data = []
for row in table.rows:
row_data = []
for cell in row.cells:
text = cell.text
text = text.replace("\n", "")
row_data.append(text)
data.append(row_data)
df = pd.DataFrame(data)
df.columns = df.iloc[0]
df = df.iloc[1:]
m_data.append(df)
df = m_data[2]
return df

View File

@@ -1,312 +0,0 @@
from __future__ import annotations
import re
from dataclasses import asdict, dataclass, field
from typing import Iterable, List, Optional
from urllib.parse import quote_plus, urljoin
import httpx
from bs4 import BeautifulSoup
from src.logic.dataclass import BookData
BASE = "https://www.lehmanns.de"
SEARCH_URL = "https://www.lehmanns.de/search/quick?mediatype_id=&q="
@dataclass
class LehmannsSearchResult:
title: str
url: str
# Core fields from the listing card
year: Optional[int] = None
edition: Optional[int] = None
publisher: Optional[str] = None
isbn13: Optional[str] = None
# Extras from the listing card
description: Optional[str] = None
authors: list[str] = field(default_factory=list)
media_type: Optional[str] = None
book_format: Optional[str] = None
price_eur: Optional[float] = None
currency: str = "EUR"
image: Optional[str] = None
# From detail page:
pages: Optional[str] = None # "<N> Seiten"
buyable: bool = True # set in enrich_pages (detail page)
unavailable_hint: Optional[str] = (
None # e.g. "Titel ist leider vergriffen; keine Neuauflage"
)
def to_dict(self) -> dict:
return asdict(self)
class LehmannsClient:
"""Scrapes quick-search results, then enriches (and filters) via product pages."""
def __init__(self, timeout: float = 20.0):
self.client = httpx.Client(
headers={
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/124.0 Safari/537.36"
),
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
},
timeout=timeout,
follow_redirects=True,
)
def close(self):
self.client.close()
def __enter__(self):
return self
def __exit__(self, *exc):
self.close()
# ------------------- Search (listing) -------------------
def build_search_url(self, title: str) -> str:
# spaces -> '+'
return SEARCH_URL + quote_plus(title)
def search_by_title(
self,
title: str,
limit: Optional[int] = None,
strict: bool = False,
only_latest: bool = True,
) -> List[BookData]:
"""
Parse the listing page only (no availability check here).
Use enrich_pages(...) afterwards to fetch detail pages, add 'pages',
and drop unbuyable items.
"""
url = self.build_search_url(title=title)
html = self._get(url)
if not html:
return []
results = self._parse_results(html)
self.enrich_pages(results)
results = [BookData().from_LehmannsSearchResult(r) for r in results]
if strict:
# filter results to only those with exact title match (case-insensitive)
title_lower = title.lower()
results = [r for r in results if r.title and r.title.lower() == title_lower]
# results = [r for r in results if r.buyable]
return results
if limit is not None:
results = results[: max(0, limit)]
if only_latest and len(results) > 1:
# keep only the latest edition (highest edition number)
results.sort(key=lambda r: (r.edition_number or 0), reverse=True)
results = [results[0]]
return results
# ------------------- Detail enrichment & filtering -------------------
def enrich_pages(
self, results: Iterable[LehmannsSearchResult], drop_unbuyable: bool = True
) -> List[LehmannsSearchResult]:
"""
Fetch each result.url, extract:
- pages: from <span class="book-meta meta-seiten" itemprop="numberOfPages">...</span>
- availability: from <li class="availability-3">...</li>
* if it contains "Titel ist leider vergriffen", mark buyable=False
* if it also contains "keine Neuauflage", set unavailable_hint accordingly
If drop_unbuyable=True, exclude non-buyable results from the returned list.
"""
enriched: List[LehmannsSearchResult] = []
for r in results:
try:
html = self._get(r.url)
if not html:
# Can't verify; keep as-is when not dropping, else skip
if not drop_unbuyable:
enriched.append(r)
continue
soup = BeautifulSoup(html, "html.parser") # type: ignore
# Pages
pages_node = soup.select_one( # type: ignore
"span.book-meta.meta-seiten[itemprop='numberOfPages'], "
"span.book-meta.meta-seiten[itemprop='numberofpages'], "
".meta-seiten [itemprop='numberOfPages'], "
".meta-seiten[itemprop='numberOfPages'], "
".book-meta.meta-seiten"
)
if pages_node:
text = pages_node.get_text(" ", strip=True)
m = re.search(r"\d+", text)
if m:
r.pages = f"{m.group(0)} Seiten"
# Availability via li.availability-3
avail_li = soup.select_one("li.availability-3") # type: ignore
if avail_li:
avail_text = " ".join(
avail_li.get_text(" ", strip=True).split()
).lower()
if "titel ist leider vergriffen" in avail_text:
r.buyable = False
if "keine neuauflage" in avail_text:
r.unavailable_hint = (
"Titel ist leider vergriffen; keine Neuauflage"
)
else:
r.unavailable_hint = "Titel ist leider vergriffen"
# Append or drop
if (not drop_unbuyable) or r.buyable:
enriched.append(r)
except Exception:
# On any per-item error, keep the record if not dropping; else skip
if not drop_unbuyable:
enriched.append(r)
continue
return enriched
# ------------------- Internals -------------------
def _get(self, url: str) -> Optional[str]:
try:
r = self.client.get(url)
r.encoding = "utf-8"
if r.status_code == 200 and "text/html" in (
r.headers.get("content-type") or ""
):
return r.text
except httpx.HTTPError:
pass
return None
def _parse_results(self, html: str) -> List[LehmannsSearchResult]:
soup = BeautifulSoup(html, "html.parser")
results: list[LehmannsSearchResult] = []
for block in soup.select("div.info-block"):
a = block.select_one(".title a[href]")
if not a:
continue
url = urljoin(BASE, a["href"].strip())
base_title = (block.select_one(".title [itemprop='name']") or a).get_text( # type: ignore
strip=True
)
# Alternative headline => extend title
alt_tag = block.select_one(".description[itemprop='alternativeHeadline']") # type: ignore
alternative_headline = alt_tag.get_text(strip=True) if alt_tag else None
title = (
f"{base_title} : {alternative_headline}"
if alternative_headline
else base_title
)
description = alternative_headline
# Authors from .author
authors: list[str] = []
author_div = block.select_one("div.author") # type: ignore
if author_div:
t = author_div.get_text(" ", strip=True)
t = re.sub(r"^\s*von\s+", "", t, flags=re.I)
for part in re.split(r"\s*;\s*|\s*&\s*|\s+und\s+", t):
name = " ".join(part.split())
if name:
authors.append(name)
# Media + format
media_type = None
book_format = None
type_text = block.select_one(".type") # type: ignore
if type_text:
t = type_text.get_text(" ", strip=True)
m = re.search(r"\b(Buch|eBook|Hörbuch)\b", t)
if m:
media_type = m.group(1)
fm = re.search(r"\(([^)]+)\)", t)
if fm:
book_format = fm.group(1).strip().upper()
# Year
year = None
y = block.select_one("[itemprop='copyrightYear']") # type: ignore
if y:
try:
year = int(y.get_text(strip=True))
except ValueError:
pass
# Edition
edition = None
ed = block.select_one("[itemprop='bookEdition']") # type: ignore
if ed:
m = re.search(r"\d+", ed.get_text(strip=True))
if m:
edition = int(m.group())
# Publisher
publisher = None
pub = block.select_one( # type: ignore
".publisherprop [itemprop='name']"
) or block.select_one(".publisher [itemprop='name']") # type: ignore
if pub:
publisher = pub.get_text(strip=True)
# ISBN-13
isbn13 = None
isbn_tag = block.select_one(".isbn [itemprop='isbn'], [itemprop='isbn']") # type: ignore
if isbn_tag:
digits = re.sub(r"[^0-9Xx]", "", isbn_tag.get_text(strip=True))
m = re.search(r"(97[89]\d{10})", digits)
if m:
isbn13 = m.group(1)
# Price (best effort)
price_eur = None
txt = block.get_text(" ", strip=True)
mprice = re.search(r"(\d{1,3}(?:\.\d{3})*,\d{2})\s*€", txt)
if not mprice and block.parent:
sib = block.parent.get_text(" ", strip=True)
mprice = re.search(r"(\d{1,3}(?:\.\d{3})*,\d{2})\s*€", sib)
if mprice:
num = mprice.group(1).replace(".", "").replace(",", ".")
try:
price_eur = float(num)
except ValueError:
pass
# Image (best-effort)
image = None
left_img = block.find_previous("img") # type: ignore
if left_img and left_img.get("src"):
image = urljoin(BASE, left_img["src"])
results.append(
LehmannsSearchResult(
title=title,
url=url,
description=description,
authors=authors,
media_type=media_type,
book_format=book_format,
year=year,
edition=edition,
publisher=publisher,
isbn13=isbn13,
price_eur=price_eur,
image=image,
)
)
return results

View File

@@ -1,58 +0,0 @@
import json
from typing import Any
from openai import OpenAI
from src import settings
def init_client() -> OpenAI:
"""Initialize the OpenAI client with the API key and model from settings."""
global client, model, api_key
if not settings.openAI.api_key:
raise ValueError("OpenAI API key is not set in the configuration.")
if not settings.openAI.model:
raise ValueError("OpenAI model is not set in the configuration.")
model = settings.openAI.model
api_key = settings.openAI.api_key
client = OpenAI(api_key=api_key)
return client
def run_shortener(title: str, length: int) -> list[dict[str, Any]]:
client = init_client()
response = client.responses.create( # type: ignore
model=model,
instructions="""you are a sentence shortener. The next message will contain the string to shorten and the length limit.
You need to shorten the string to be under the length limit, while keeping as much detail as possible. The result may NOT be longer than the length limit.
based on that, please reply only the shortened string. Give me 5 choices. if the length is too long, discard the string and try another one.Return the data as a python list containing the result as {"shortened_string": shortened_string, "length": lengthasInt}. Do not return the answer in a codeblock, use a pure string. Before answering, check the results and if ANY is longer than the needed_length, discard all and try again""",
input=f'{{"string":"{title}", "needed_length":{length}}}',
)
answers = response.output_text
return eval(answers) # type: ignore
# answers are strings in json format, so we need to convert them to a list of dicts
def name_tester(name: str) -> dict:
client = init_client()
response = client.responses.create( # type: ignore
model=model,
instructions="""you are a name tester, You are given a name and will have to split the name into first name, last name, and if present the title. Return the name in a json format with the keys "title", "first_name", "last_name". If no title is present, set title to none. Do NOt return the answer in a codeblock, use a pure json string. Assume the names are in the usual german naming scheme""",
input=f'{{"name":"{name}"}}',
)
answers = response.output_text
return json.loads(answers)
def semester_converter(semester: str) -> str:
client = init_client()
response = client.responses.create( # type: ignore
model=model,
instructions="""you are a semester converter. You will be given a string. Convert this into a string like this: SoSe YY or WiSe YY/YY+1. Do not return the answer in a codeblock, use a pure string.""",
input=semester,
)
answers = response.output_text
return answers

View File

@@ -1,23 +0,0 @@
# add depend path to system path
from pdfquery import PDFQuery
def pdf_to_csv(path: str) -> str:
"""
Extracts the data from a pdf file and returns it as a pandas dataframe
"""
file = PDFQuery(path)
file.load()
# get the text from the pdf file
text_elems = file.extract([("with_formatter", "text"), ("all_text", "*")])
extracted_text = text_elems["all_text"]
return extracted_text
if __name__ == "__main__":
text = pdf_to_csv("54_pdf.pdf")
# remove linebreaks
text = text.replace("\n", "")
# print(text)

View File

@@ -1,248 +0,0 @@
"""Semester helper class
A small utility around the *German* academic calendar that distinguishes
between *Wintersemester* (WiSe) and *Sommersemester* (SoSe).
Key points
----------
* A **`Semester`** is identified by a *term* ("SoSe" or "WiSe") and the last two
digits of the calendar year in which the term *starts*.
* Formatting **never** pads the year with a leading zero so ``6`` stays ``6``.
* ``offset(n)`` and the static ``generate_missing`` reliably walk the timeline
one semester at a time with correct year transitions:
SoSe 6 → **WiSe 6/7** → SoSe 7 → WiSe 7/8 → …
"""
from __future__ import annotations
import datetime
import re
from src.shared.logging import log
class Semester:
"""Represents a German university semester (WiSe or SoSe)."""
# ------------------------------------------------------------------
# Classlevel defaults will be *copied* to each instance and then
# potentially overwritten in ``__init__``.
# ------------------------------------------------------------------
_year: int | None = int(str(datetime.datetime.now().year)[2:]) # 24 → 24
_semester: str | None = None # "WiSe" or "SoSe" set later
_month: int | None = datetime.datetime.now().month
value: str | None = None # Humanreadable label, e.g. "WiSe 23/24"
# ------------------------------------------------------------------
# Construction helpers
# ------------------------------------------------------------------
def __init__(
self,
year: int | None = None,
semester: str | None = None,
month: int | None = None,
) -> None:
if year is not None:
self._year = int(year)
if semester is not None:
if semester not in ("WiSe", "SoSe"):
raise ValueError("semester must be 'WiSe' or 'SoSe'")
self._semester = semester
if month is not None:
self._month = month
self.__post_init__()
def __post_init__(self) -> None: # noqa: D401 keep original name
if self._year is None:
self._year = int(str(datetime.datetime.now().year)[2:])
if self._month is None:
self._month = datetime.datetime.now().month
if self._semester is None:
self._generate_semester_from_month()
self._compute_value()
# ------------------------------------------------------------------
# Dunder helpers
# ------------------------------------------------------------------
def __str__(self) -> str: # noqa: D401 keep original name
return self.value or "<invalid Semester>"
def __repr__(self) -> str: # Helpful for debugging lists
return f"Semester({self._year!r}, {self._semester!r})"
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _generate_semester_from_month(self) -> None:
"""Infer *WiSe* / *SoSe* from the month attribute."""
self._semester = "WiSe" if (self._month <= 3 or self._month > 9) else "SoSe"
def _compute_value(self) -> None:
"""Humanreadable semester label e.g. ``WiSe 23/24`` or ``SoSe 24``."""
year = self._year
if self._semester == "WiSe":
next_year = (year + 1) % 100 # wrap 99 → 0
self.value = f"WiSe {year}/{next_year}"
else: # SoSe
self.value = f"SoSe {year}"
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def offset(self, value: int) -> "Semester":
"""Return a new :class:`Semester` *value* steps away.
The algorithm maps every semester to a monotonically increasing
*linear index* so that simple addition suffices:
``index = year * 2 + (0 if SoSe else 1)``.
"""
if not isinstance(value, int):
raise TypeError("value must be an int (number of semesters to jump)")
if value == 0:
return Semester(self._year, self._semester)
current_idx = self._year * 2 + (0 if self._semester == "SoSe" else 1)
target_idx = current_idx + value
if target_idx < 0:
raise ValueError("offset would result in a negative year not supported")
new_year, semester_bit = divmod(target_idx, 2)
new_semester = "SoSe" if semester_bit == 0 else "WiSe"
return Semester(new_year, new_semester)
# ------------------------------------------------------------------
# Comparison helpers
# ------------------------------------------------------------------
def isPastSemester(self, current: "Semester") -> bool:
log.debug(f"Comparing {self} < {current}")
if self.year < current.year:
return True
if self.year == current.year:
return (
self.semester == "WiSe" and current.semester == "SoSe"
) # WiSe before next SoSe
return False
def isFutureSemester(self, current: "Semester") -> bool:
if self.year > current.year:
return True
if self.year == current.year:
return (
self.semester == "SoSe" and current.semester == "WiSe"
) # SoSe after WiSe of same year
return False
def isMatch(self, other: "Semester") -> bool:
return self.year == other.year and self.semester == other.semester
# ------------------------------------------------------------------
# Convenience properties
# ------------------------------------------------------------------
@property
def next(self) -> "Semester":
return self.offset(1)
@property
def previous(self) -> "Semester":
return self.offset(-1)
@property
def year(self) -> int:
return self._year
@property
def semester(self) -> str:
return self._semester
# ------------------------------------------------------------------
# Static helpers
# ------------------------------------------------------------------
@staticmethod
def generate_missing(start: "Semester", end: "Semester") -> list[str]:
"""Return all consecutive semesters from *start* to *end* (inclusive)."""
if not isinstance(start, Semester) or not isinstance(end, Semester):
raise TypeError("start and end must be Semester instances")
if start.isFutureSemester(end) and not start.isMatch(end):
raise ValueError("'start' must not be after 'end'")
chain: list[Semester] = [start.value]
current = start
while not current.isMatch(end):
current = current.next
chain.append(current.value)
if len(chain) > 1000: # sanity guard
raise RuntimeError("generate_missing exceeded sane iteration limit")
return chain
# ------------------------------------------------------------------
# Parsing helper
# ------------------------------------------------------------------
@classmethod
def from_string(cls, s: str) -> "Semester":
"""Parse a humanreadable semester label and return a :class:`Semester`.
Accepted formats (caseinsensitive)::
"SoSe <YY>" → SoSe of year YY
"WiSe <YY>/<YY+1>" → Winter term starting in YY
"WiSe <YY>" → Shorthand for the above (next year implied)
``YY`` may contain a leading zero ("06" → 6).
"""
if not isinstance(s, str):
raise TypeError("s must be a string")
pattern = r"\s*(WiSe|SoSe)\s+(\d{1,2})(?:\s*/\s*(\d{1,2}))?\s*"
m = re.fullmatch(pattern, s, flags=re.IGNORECASE)
if not m:
raise ValueError(
"invalid semester string format expected 'SoSe YY' or 'WiSe YY/YY' (spacing flexible)"
)
term_raw, y1_str, y2_str = m.groups()
term = term_raw.capitalize() # normalize case → "WiSe" or "SoSe"
year = int(y1_str.lstrip("0") or "0") # "06" → 6, "0" stays 0
if term == "SoSe":
if y2_str is not None:
raise ValueError(
"SoSe string should not contain '/' followed by a second year"
)
return cls(year, "SoSe")
# term == "WiSe"
if y2_str is not None:
next_year = int(y2_str.lstrip("0") or "0")
expected_next = (year + 1) % 100
if next_year != expected_next:
raise ValueError("WiSe second year must equal first year + 1 (mod 100)")
# Accept both explicit "WiSe 6/7" and shorthand "WiSe 6"
return cls(year, "WiSe")
# ------------------------- quick selftest -------------------------
if __name__ == "__main__":
# Chain generation demo ------------------------------------------------
s_start = Semester(6, "SoSe") # SoSe 6
s_end = Semester(25, "WiSe") # WiSe 25/26
chain = Semester.generate_missing(s_start, s_end)
# print("generate_missing:", [str(s) for s in chain])
# Parsing demo ---------------------------------------------------------
examples = [
"SoSe 6",
"WiSe 6/7",
"WiSe 6",
"SoSe 23",
"WiSe 23/24",
"WiSe 24",
"WiSe 99/00",
"SoSe 00",
"WiSe 100/101", # test large year
]
for ex in examples:
parsed = Semester.from_string(ex)
print(f"'{ex}'{parsed} ({parsed.year=}, {parsed.semester=})")

View File

@@ -1,24 +0,0 @@
from dataclasses import dataclass, field
import yaml
@dataclass
class Settings:
"""Settings for the app."""
save_path: str
database_name: str
database_path: str
default_apps: bool = True
custom_applications: list[dict] = field(default_factory=list)
def save_settings(self) -> None:
"""Save the settings to the config file."""
with open("config.yaml", "w") as f:
yaml.dump(self.__dict__, f)
# open the config file and load the settings
with open("config.yaml", "r") as f:
data = yaml.safe_load(f)

View File

@@ -1,314 +0,0 @@
from enum import Enum
from typing import Any, Optional, Union
import requests
from bs4 import BeautifulSoup
# import sleep_and_retry decorator to retry requests
from ratelimit import limits, sleep_and_retry
from src.logic.dataclass import BookData
from src.shared.logging import log
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
BASE = "https://rds.ibs-bw.de"
#
TITLE = "RDS_TITLE"
SIGNATURE = "RDS_SIGNATURE"
EDITION = "RDS_EDITION"
ISBN = "RDS_ISBN"
AUTHOR = "RDS_PERSON"
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
(HTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
"Accept-Language": "en-US, en;q=0.5",
}
RATE_LIMIT = 20
RATE_PERIOD = 30
class TransformerType(Enum):
ARRAY = "ARRAY"
COinS = "COinS"
BibTeX = "BibTeX"
RIS = "RIS"
RDS = "RDS"
class WebRequest:
def __init__(self) -> None:
"""Request data from the web, and format it depending on the mode."""
self.apparat = None
self.use_any = False # use any book that matches the search term
self.signature = None
self.ppn = None
self.data = None
self.timeout = 5
log.info("Initialized WebRequest")
@property
def use_any_book(self):
"""use any book that matches the search term"""
self.use_any = True
log.info("Using any book")
return self
def set_apparat(self, apparat: int) -> "WebRequest":
self.apparat = apparat
if int(self.apparat) < 10:
self.apparat = f"0{self.apparat}"
log.info(f"Set apparat to {self.apparat}")
return self
def get_ppn(self, signature: str) -> "WebRequest":
self.signature = signature
if "+" in signature:
signature = signature.replace("+", "%2B")
if "doi.org" in signature:
signature = signature.split("/")[-1]
self.ppn = signature
return self
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
def search_book(self, searchterm: str) -> str:
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
return response.text
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
def search_ppn(self, ppn: str) -> str:
response = requests.get(API_URL.format(ppn), timeout=self.timeout)
return response.text
def get_book_links(self, searchterm: str) -> list[str]:
response: str = self.search_book(searchterm) # type:ignore
soup = BeautifulSoup(response, "html.parser")
links = soup.find_all("a", class_="title getFull")
res: list[str] = []
for link in links:
res.append(BASE + link["href"])
return res
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
def search(self, link: str) -> Optional[str]:
try:
response = requests.get(link, timeout=self.timeout)
return response.text
except requests.exceptions.RequestException as e:
log.error(f"Request failed: {e}")
return None
def get_data(self) -> Optional[list[str]]:
links = self.get_book_links(self.ppn)
log.debug(f"Links: {links}")
return_data: list[str] = []
for link in links:
result: str = self.search(link) # type:ignore
# in result search for class col-xs-12 rds-dl RDS_LOCATION
# if found, return text of href
soup = BeautifulSoup(result, "html.parser")
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
if locations:
for location in locations:
if "1. OG Semesterapparat" in location.text:
log.success("Found Semesterapparat, adding entry")
pre_tag = soup.find_all("pre")
return_data = []
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
return return_data
else:
item_location = location.find(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
).text.strip()
log.debug(f"Item location: {item_location}")
if self.use_any:
pre_tag = soup.find_all("pre")
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
raise ValueError("No <pre> tag found")
elif f"Semesterapparat-{self.apparat}" in item_location:
pre_tag = soup.find_all("pre")
return_data = []
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
return return_data
else:
log.error(
f"Signature {self.signature} not found in {item_location}"
)
# return_data = []
return return_data
def get_data_elsa(self) -> Optional[list[str]]:
links = self.get_book_links(self.ppn)
for link in links:
result = self.search(link)
# in result search for class col-xs-12 rds-dl RDS_LOCATION
# if found, return text of href
soup = BeautifulSoup(result, "html.parser")
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
if locations:
for _ in locations:
pre_tag = soup.find_all("pre")
return_data = []
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
return return_data
class BibTextTransformer:
"""Transforms data from the web into a BibText format.
Valid Modes are ARRAY, COinS, BibTeX, RIS, RDS
Raises:
ValueError: Raised if mode is not in valid_modes
"""
valid_modes = [
TransformerType.ARRAY,
TransformerType.COinS,
TransformerType.BibTeX,
TransformerType.RIS,
TransformerType.RDS,
]
def __init__(self, mode: TransformerType = TransformerType.ARRAY) -> None:
self.mode = mode.value
self.field = None
self.signature = None
if mode not in self.valid_modes:
log.error(f"Mode {mode} not valid")
raise ValueError(f"Mode {mode} not valid")
self.data = None
# self.bookdata = BookData(**self.data)
def use_signature(self, signature: str) -> "BibTextTransformer":
"""use the exact signature to search for the book"""
self.signature = signature
return self
def get_data(self, data: Optional[list[str]] = None) -> "BibTextTransformer":
RIS_IDENT = "TY -"
ARRAY_IDENT = "[kid]"
COinS_IDENT = "ctx_ver"
BIBTEX_IDENT = "@book"
RDS_IDENT = "RDS ---------------------------------- "
if data is None:
self.data = None
return self
if self.mode == "RIS":
for line in data:
if RIS_IDENT in line:
self.data = line
elif self.mode == "ARRAY":
for line in data:
if ARRAY_IDENT in line:
self.data = line
elif self.mode == "COinS":
for line in data:
if COinS_IDENT in line:
self.data = line
elif self.mode == "BibTeX":
for line in data:
if BIBTEX_IDENT in line:
self.data = line
elif self.mode == "RDS":
for line in data:
if RDS_IDENT in line:
self.data = line
return self
def return_data(
self, option: Any = None
) -> Union[
Optional[BookData],
Optional[RDS_GENERIC_DATA],
Optional[RDS_AVAIL_DATA],
None,
dict[str, Union[RDS_AVAIL_DATA, RDS_GENERIC_DATA]],
]:
"""Return Data to caller.
Args:
option (string, optional): Option for RDS as there are two filetypes. Use rds_availability or rds_data. Anything else gives a dict of both responses. Defaults to None.
Returns:
BookData: a dataclass containing data about the book
"""
if self.data is None:
return None
match self.mode:
case "ARRAY":
return ARRAYData(self.signature).transform(self.data)
case "COinS":
return COinSData().transform(self.data)
case "BibTeX":
return BibTeXData().transform(self.data)
case "RIS":
return RISData().transform(self.data)
case "RDS":
return RDSData().transform(self.data).return_data(option)
case _:
return None
# if self.mode == "ARRAY":
# return ARRAYData().transform(self.data)
# elif self.mode == "COinS":
# return COinSData().transform(self.data)
# elif self.mode == "BibTeX":
# return BibTeXData().transform(self.data)
# elif self.mode == "RIS":
# return RISData().transform(self.data)
# elif self.mode == "RDS":
# return RDSData().transform(self.data).return_data(option)
def cover(isbn):
test_url = f"https://www.buchhandel.de/cover/{isbn}/{isbn}-cover-m.jpg"
# log.debug(test_url)
data = requests.get(test_url, stream=True)
return data.content
def get_content(soup, css_class):
return soup.find("div", class_=css_class).text.strip()
if __name__ == "__main__":
# log.debug("main")
link = "CU 8500 K64"
data = WebRequest(71).get_ppn(link).get_data()
bib = BibTextTransformer("ARRAY").get_data().return_data()
log.debug(bib)

View File

@@ -1,373 +0,0 @@
import zipfile
from typing import Any, Optional
import fitz # PyMuPDF
import pandas as pd
from bs4 import BeautifulSoup
from docx import Document
from src.logic.dataclass import Book, SemapDocument
from src.shared.logging import log
def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
doc = Document(path)
tables = doc.tables
m_data = []
for table in tables:
data = []
for row in table.rows:
row_data: list[Any] = []
for cell in row.cells:
text = cell.text
text = text.replace("\n", "")
row_data.append(text)
# if text == "Ihr Fach:":
# row_data.append(get_fach(path))
data.append(row_data)
df = pd.DataFrame(data)
df.columns = df.iloc[0]
df = df.iloc[1:]
m_data.append(df)
return m_data
def get_fach(path: str) -> Optional[str]:
document = zipfile.ZipFile(path)
xml_data = document.read("word/document.xml")
document.close()
soup = BeautifulSoup(xml_data, "xml")
# text we need is in <w:p w14:paraId="12456A32" ... > -> w:r -> w:t
paragraphs = soup.find_all("w:p")
for para in paragraphs:
para_id = para.get("w14:paraId")
if para_id == "12456A32":
# get the data in the w:t
for run in para.find_all("w:r"):
data = run.find("w:t")
if data and data.contents:
return data.contents[0]
return None
def makeDict() -> dict[str, Optional[str]]:
return {
"work_author": None,
"section_author": None,
"year": None,
"edition": None,
"work_title": None,
"chapter_title": None,
"location": None,
"publisher": None,
"signature": None,
"issue": None,
"pages": None,
"isbn": None,
"type": None,
}
def tuple_to_dict(tlist: tuple, type: str) -> list[dict[str, Optional[str]]]:
ret: list[dict[str, Optional[str]]] = []
for line in tlist:
data = makeDict()
if type == "Monografien":
data["type"] = type
data["work_author"] = line[0]
data["year"] = line[1]
data["edition"] = line[2]
data["work_title"] = line[3]
data["location"] = line[4]
data["publisher"] = line[5]
data["signature"] = line[6]
data["pages"] = line[7]
elif type == "Herausgeberwerke":
data["type"] = type
data["section_author"] = line[0]
data["year"] = line[1]
data["edition"] = line[2]
data["chapter_title"] = line[3]
data["work_author"] = line[4]
data["work_title"] = line[5]
data["location"] = line[6]
data["publisher"] = line[7]
data["signature"] = line[9]
data["pages"] = line[8]
elif type == "Zeitschriftenaufsätze":
data["type"] = type
data["section_author"] = line[0]
data["year"] = line[1]
data["issue"] = line[2]
data["chapter_title"] = line[3]
data["work_title"] = line[4]
data["location"] = line[5]
data["publisher"] = line[6]
data["signature"] = line[8]
data["pages"] = line[7]
ret.append(data)
return ret
def elsa_word_to_csv(path: str) -> tuple[list[dict[str, Optional[str]]], str]:
doc = Document(path)
# # print all lines in doc
doctype = [para.text for para in doc.paragraphs if para.text != ""][-1]
tuples = {
"Monografien": ("", "", "", "", "", "", "", "", ""),
"Herausgeberwerke": ("", "", "", "", "", "", "", "", "", "", ""),
"Zeitschriftenaufsätze": ("", "", "", "", "", "", "", "", "", ""),
}
tables = doc.tables
m_data: list[pd.DataFrame] = []
for table in tables:
data: list[list[str]] = []
for row in table.rows:
row_data: list[str] = []
for cell in row.cells:
text = cell.text
text = text.replace("\n", "")
text = text.replace("\u2002", "")
row_data.append(text)
data.append(row_data)
df = pd.DataFrame(data)
df.columns = df.iloc[0]
df = df.iloc[1:]
m_data.append(df)
df = m_data[0]
# split df to rows
data = [
row for row in df.itertuples(index=False, name=None) if row != tuples[doctype]
]
# log.debug(data)
return tuple_to_dict(data, doctype), doctype
def word_to_semap(word_path: str, ai: bool = True) -> SemapDocument:
log.info("Parsing Word Document {}", word_path)
semap = SemapDocument()
df = word_docx_to_csv(word_path)
apparatdata = df[0]
apparatdata = apparatdata.to_dict()
keys = list(apparatdata.keys())
# print(apparatdata, keys)
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys) - 1, 2)}
semap.phoneNumber = appdata["Telefon:"]
semap.subject = appdata["Ihr Fach:"]
semap.mail = appdata["Mailadresse:"]
semap.personName = ",".join(appdata["Ihr Name und Titel:"].split(",")[:-1])
semap.personTitle = ",".join(appdata["Ihr Name und Titel:"].split(",")[-1:]).strip()
apparatdata = df[1]
apparatdata = apparatdata.to_dict()
keys = list(apparatdata.keys())
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
semap.title = appdata["Veranstaltung:"]
semap.semester = appdata["Semester:"]
if ai:
semap.renameSemester
semap.nameSetter
books = df[2]
booklist = []
for i in range(len(books)):
if books.iloc[i].isnull().all():
continue
data = books.iloc[i].to_dict()
book = Book()
book.from_dict(data)
if book.is_empty:
continue
elif not book.has_signature:
continue
else:
booklist.append(book)
log.info("Found {} books", len(booklist))
semap.books = booklist
return semap
def pdf_to_semap(pdf_path: str, ai: bool = True) -> SemapDocument:
"""
Parse a Semesterapparat PDF like the sample you provided and return a SemapDocument.
- No external programs, only PyMuPDF.
- Robust to multi-line field values (e.g., hyphenated emails) and multi-line table cells.
- Works across multiple pages; headers only need to exist on the first page.
"""
doc = fitz.open(pdf_path)
semap = SemapDocument()
# ---------- helpers ----------
def _join_tokens(tokens: list[str]) -> str:
"""Join tokens, preserving hyphen/URL joins across line wraps."""
parts = []
for tok in tokens:
if parts and (
parts[-1].endswith("-")
or parts[-1].endswith("/")
or parts[-1].endswith(":")
):
parts[-1] = parts[-1] + tok # no space after '-', '/' or ':'
else:
parts.append(tok)
return " ".join(parts).strip()
def _extract_row_values_multiline(
page, labels: list[str], y_window: float = 24
) -> dict[str, str]:
"""For a row of inline labels (e.g., Name/Fach/Telefon/Mail), grab text to the right of each label."""
rects = []
for lab in labels:
hits = page.search_for(lab)
if hits:
rects.append((lab, hits[0]))
if not rects:
return {}
rects.sort(key=lambda t: t[1].x0)
words = page.get_text("words")
out = {}
for i, (lab, r) in enumerate(rects):
x0 = r.x1 + 1
x1 = rects[i + 1][1].x0 - 1 if i + 1 < len(rects) else page.rect.width - 5
y0 = r.y0 - 3
y1 = r.y0 + y_window
toks = [w for w in words if x0 <= w[0] <= x1 and y0 <= w[1] <= y1]
toks.sort(key=lambda w: (w[1], w[0])) # line, then x
out[lab] = _join_tokens([w[4] for w in toks])
return out
def _compute_columns_from_headers(page0):
"""Find column headers (once) and derive column centers + header baseline."""
headers = [
("Autorenname(n):", "Autorenname(n):Nachname, Vorname"),
("Jahr/Auflage", "Jahr/Auflage"),
("Titel", "Titel"),
("Ort und Verlag", "Ort und Verlag"),
("Standnummer", "Standnummer"),
("Interne Vermerke", "Interne Vermerke"),
]
found = []
for label, canon in headers:
rects = [
r for r in page0.search_for(label) if r.y0 > 200
] # skip top-of-form duplicates
if rects:
found.append((canon, rects[0]))
found.sort(key=lambda t: t[1].x0)
cols = [(canon, r.x0, r.x1, (r.x0 + r.x1) / 2.0) for canon, r in found]
header_y = min(r.y0 for _, r in found) if found else 0
return cols, header_y
def _extract_table_rows_from_page(
page, cols, header_y, y_top_margin=5, y_bottom_margin=40, y_tol=26.0
):
"""
Group words into logical rows (tolerant to wrapped lines), then map each word
to the nearest column by x-center and join tokens per column.
"""
words = [
w
for w in page.get_text("words")
if w[1] > header_y + y_top_margin
and w[3] < page.rect.height - y_bottom_margin
]
# group into row bands by y (tolerance big enough to capture wrapped lines, but below next row gap)
rows = []
for w in sorted(words, key=lambda w: w[1]):
y = w[1]
for row in rows:
if abs(row["y_mean"] - y) <= y_tol:
row["ys"].append(y)
row["y_mean"] = sum(row["ys"]) / len(row["ys"])
row["words"].append(w)
break
else:
rows.append({"y_mean": y, "ys": [y], "words": [w]})
# map to columns + join
joined_rows = []
for row in rows:
rowdict = {canon: "" for canon, *_ in cols}
words_by_col = {canon: [] for canon, *_ in cols}
for w in sorted(row["words"], key=lambda w: (w[1], w[0])):
xmid = (w[0] + w[2]) / 2.0
canon = min(cols, key=lambda c: abs(xmid - c[3]))[0]
words_by_col[canon].append(w[4])
for canon, toks in words_by_col.items():
rowdict[canon] = _join_tokens(toks)
if any(v for v in rowdict.values()):
joined_rows.append(rowdict)
return joined_rows
# ---------- top-of-form fields ----------
p0 = doc[0]
row1 = _extract_row_values_multiline(
p0,
["Ihr Name und Titel:", "Ihr Fach:", "Telefon:", "Mailadresse:"],
y_window=22,
)
row2 = _extract_row_values_multiline(
p0, ["Veranstaltung:", "Semester:"], y_window=20
)
name_title = row1.get("Ihr Name und Titel:", "") or ""
semap.subject = row1.get("Ihr Fach:", None)
semap.phoneNumber = row1.get("Telefon:", None) # keep as-is (string like "682-308")
semap.mail = row1.get("Mailadresse:", None)
semap.personName = ",".join(name_title.split(",")[:-1]) if name_title else None
semap.personTitle = (
",".join(name_title.split(",")[-1:]).strip() if name_title else None
)
semap.title = row2.get("Veranstaltung:", None)
semap.semester = row2.get("Semester:", None)
# ---------- table extraction (all pages) ----------
cols, header_y = _compute_columns_from_headers(p0)
all_rows: list[dict[str, Any]] = []
for pn in range(len(doc)):
all_rows.extend(_extract_table_rows_from_page(doc[pn], cols, header_y))
# drop the sub-header line "Nachname, Vorname" etc.
filtered = []
for r in all_rows:
if r.get("Autorenname(n):Nachname, Vorname", "").strip() in (
"",
"Nachname, Vorname",
):
# skip if it's just the sub-header line
if all(not r[c] for c in r if c != "Autorenname(n):Nachname, Vorname"):
continue
filtered.append(r)
# build Book objects (same filters as your word parser)
booklist: list[Book] = []
for row in filtered:
b = Book()
b.from_dict(row)
if b.is_empty:
continue
if not b.has_signature:
continue
booklist.append(b)
semap.books = booklist
# keep parity with your post-processing
if ai:
_ = semap.renameSemester
_ = semap.nameSetter
return semap
if __name__ == "__main__":
else_df = pdf_to_semap("C:/Users/aky547/Dokumente/testsemap.pdf")
# print(else_df)

View File

@@ -1,67 +0,0 @@
import xml.etree.ElementTree as ET
from src.logic.dataclass import Apparat, BookData, SemapDocument, XMLMailSubmission
from src.logic.semester import Semester
def parse_xml_submission(xml_string: str) -> XMLMailSubmission:
"""
Parse an XML string representing a mail submission and return an XMLMailSubmission object.
"""
submission = XMLMailSubmission()
root = ET.fromstring(xml_string)
static_data = root.find("static")
static_info = {child.tag: child.text for child in static_data}
books = root.find("books")
books_info = []
for book in books:
book_details = {detail.tag: detail.text for detail in book}
book = BookData(
author=book_details.get("authorname"),
year=book_details.get("year").split("/")[0]
if "/" in book_details.get("year")
else book_details.get("year"),
edition=book_details.get("year").split("/")[1]
if "/" in book_details.get("year")
else None,
title=book_details.get("title"),
signature=book_details.get("signature"),
)
books_info.append(book)
# Extract static data
submission.name = static_info.get("name")
submission.lastname = static_info.get("lastname")
submission.title = static_info.get("title")
submission.telno = int(static_info.get("telno"))
submission.email = static_info.get("mail")
submission.app_name = static_info.get("apparatsname")
submission.subject = static_info.get("subject")
sem_year = static_info.get("semester").split()[1]
sem_term = static_info.get("semester").split()[0]
submission.semester = Semester(semester=sem_term, year=int(sem_year))
submission.books = books_info
# Extract book information
# book_info = []
# for book in books:
# book_details = {detail.tag: detail.text for detail in book}
# book_info.append(book_details)
return submission
def eml_parser(path: str) -> XMLMailSubmission:
with open(path, "r", encoding="utf-8") as file:
xml_content = file.read().split("\n\n", 1)[1] # Skip headers
print("EML content loaded, parsing XML...")
print(xml_content)
return parse_xml_submission(xml_content)
def eml_to_semap(xml_mail: XMLMailSubmission) -> SemapDocument:
submission = eml_parser(xml_mail)
semap_doc = SemapDocument(
# prof=Prof(name=submission.name, lastname=submission.lastname, email=submission.email),
apparat=Apparat(name=submission.app_name, subject=submission.subject),
semester=submission.semester,
books=submission.books,
)
return semap_doc

View File

@@ -1,340 +0,0 @@
from dataclasses import dataclass
from typing import Optional
from pyzotero import zotero
from src import settings
from src.logic.webrequest import BibTextTransformer, WebRequest
from src.shared.logging import log
@dataclass
class Creator:
firstName: str = None
lastName: str = None
creatorType: str = "author"
def from_dict(self, data: dict) -> None:
for key, value in data.items():
setattr(self, key, value)
def from_string(self, data: str) -> "Creator":
if "," in data:
self.firstName = data.split(",")[1]
self.lastName = data.split(",")[0]
return self
# set __dict__ object to be used in json
@dataclass
class Book:
itemType: str = "book"
creators: list[Creator] = None
tags: list = None
collections: list = None
relations: dict = None
title: str = None
abstractNote: str = None
series: str = None
seriesNumber: str = None
volume: str = None
numberOfVolumes: str = None
edition: str = None
place: str = None
publisher: str = None
date: str = None
numPages: str = None
language: str = None
ISBN: str = None
shortTitle: str = None
url: str = None
accessDate: str = None
archive: str = None
archiveLocation: str = None
libraryCatalog: str = None
callNumber: str = None
rights: str = None
extra: str = None
def to_dict(self) -> dict:
ret = {}
for key, value in self.__dict__.items():
if value:
ret[key] = value
return ret
@dataclass
class BookSection:
itemType: str = "bookSection"
title: str = None
creators: list[Creator] = None
abstractNote: str = None
bookTitle: str = None
series: str = None
seriesNumber: str = None
volume: str = None
numberOfVolumes: str = None
edition: str = None
place: str = None
publisher: str = None
date: str = None
pages: str = None
language: str = None
ISBN: str = None
shortTitle: str = None
url: str = None
accessDate: str = None
archive: str = None
archiveLocation: str = None
libraryCatalog: str = None
callNumber: str = None
rights: str = None
extra: str = None
tags = list
collections = list
relations = dict
def to_dict(self) -> dict:
ret = {}
for key, value in self.__dict__.items():
if value:
ret[key] = value
return ret
def assign(self, book) -> None:
for key, value in book.__dict__.items():
if key in self.__dict__.keys():
try:
setattr(self, key, value)
except AttributeError:
pass
@dataclass
class JournalArticle:
itemType = "journalArticle"
title: str = None
creators: list[Creator] = None
abstractNote: str = None
publicationTitle: str = None
volume: str = None
issue: str = None
pages: str = None
date: str = None
series: str = None
seriesTitle: str = None
seriesText: str = None
journalAbbreviation: str = None
language: str = None
DOI: str = None
ISSN: str = None
shortTitle: str = None
url: str = None
accessDate: str = None
archive: str = None
archiveLocation: str = None
libraryCatalog: str = None
callNumber: str = None
rights: str = None
extra: str = None
tags = list
collections = list
relations = dict
def to_dict(self) -> dict:
ret = {}
for key, value in self.__dict__.items():
if value:
ret[key] = value
return ret
def assign(self, book: dict) -> None:
for key, value in book.__dict__.items():
if key in self.__dict__.keys():
try:
setattr(self, key, value)
except AttributeError:
pass
class ZoteroController:
zoterocfg = settings.zotero
def __init__(self):
if self.zoterocfg.library_id is None:
return
self.zot = zotero.Zotero( # type: ignore
self.zoterocfg.library_id,
self.zoterocfg.library_type,
self.zoterocfg.api_key,
)
def get_books(self) -> list:
ret = []
items = self.zot.top() # type: ignore
for item in items:
if item["data"]["itemType"] == "book":
ret.append(item)
return ret
# create item in zotero
# item is a part of a book
def __get_data(self, isbn) -> dict:
web = WebRequest()
web.get_ppn(isbn)
data = web.get_data_elsa()
bib = BibTextTransformer()
bib.get_data(data)
book = bib.return_data()
return book
# # #print(zot.item_template("bookSection"))
def createBook(self, isbn) -> Book:
book = self.__get_data(isbn)
bookdata = Book()
bookdata.title = book.title.split(":")[0]
bookdata.ISBN = book.isbn
bookdata.language = book.language
bookdata.date = book.year
bookdata.publisher = book.publisher
bookdata.url = book.link
bookdata.edition = book.edition
bookdata.place = book.place
bookdata.numPages = book.pages
authors = [
Creator().from_string(author).__dict__ for author in book.author.split(";")
]
authors = [author for author in authors if author["lastName"] is not None]
bookdata.creators = authors
return bookdata
def createItem(self, item) -> Optional[str]:
resp = self.zot.create_items([item]) # type: ignore
if "successful" in resp.keys():
log.debug(resp)
return resp["successful"]["0"]["key"]
else:
return None
def deleteItem(self, key) -> None:
items = self.zot.items()
for item in items:
if item["key"] == key:
self.zot.delete_item(item) # type: ignore
# #print(item)
break
def createHGSection(self, book: Book, data: dict) -> Optional[str]:
log.debug(book)
chapter = BookSection()
chapter.assign(book)
chapter.pages = data["pages"]
chapter.itemType = "bookSection"
chapter.ISBN = ""
chapter.url = ""
chapter.title = data["chapter_title"]
creators = chapter.creators
for creator in creators:
creator["creatorType"] = "editor"
chapter.creators = creators
authors = [
Creator().from_string(author).__dict__
for author in data["section_author"].split(";")
]
chapter.creators += authors
log.debug(chapter.to_dict())
return self.createItem(chapter.to_dict())
pass
def createBookSection(self, book: Book, data: dict) -> Optional[str]:
chapter = BookSection()
chapter.assign(book)
chapter.pages = data["pages"]
chapter.itemType = "bookSection"
chapter.ISBN = ""
chapter.url = ""
chapter.title = ""
return self.createItem(chapter.to_dict())
# chapter.creators
def createJournalArticle(self, journal, article) -> Optional[str]:
# #print(type(article))
journalarticle = JournalArticle()
journalarticle.assign(journal)
journalarticle.itemType = "journalArticle"
journalarticle.creators = [
Creator().from_string(author).__dict__
for author in article["section_author"].split(";")
]
journalarticle.date = article["year"]
journalarticle.title = article["chapter_title"]
journalarticle.publicationTitle = article["work_title"].split(":")[0].strip()
journalarticle.pages = article["pages"]
journalarticle.ISSN = article["isbn"]
journalarticle.issue = article["issue"]
journalarticle.url = article["isbn"]
# #print(journalarticle.to_dict())
return self.createItem(journalarticle.to_dict())
def get_citation(self, item) -> str:
title = self.zot.item( # type: ignore
item,
content="bib",
style="deutsche-gesellschaft-fur-psychologie",
)[0]
# title = title[0]
title = (
title.replace("<i>", "")
.replace("</i>", "")
.replace('<div class="csl-entry">', "")
.replace("</div>", "")
.replace("&amp;", "&")
)
return title
if __name__ == "__main__":
zot = ZoteroController()
book = zot.createBook("DV 3000 D649 (4)")
row = "Döbert, Hans & Hörner, Wolfgang & Kopp, Bortho von & Reuter, Lutz R."
zot.createBookSection()
# book = Book()
# # # book.
# ISBN = "9783801718718"
# book = createBook(isbn=ISBN)
# chapter = BookSection()
# chapter.title = "Geistige Behinderung"
# chapter.bookTitle = book.title
# chapter.pages = "511 - 538"
# chapter.publisher = book.publisher
# authors = [
# Creator("Jennifer M.", "Phillips").__dict__,
# Creator("Hower", "Kwon").__dict__,
# Creator("Carl", "Feinstein").__dict__,
# Creator("Inco", "Spintczok von Brisinski").__dict__,
# ]
# publishers = book.author
# if isinstance(publishers, str):
# publishers = [publishers]
# for publisher in publishers:
# # #print(publisher)
# creator = Creator().from_string(publisher)
# creator.creatorType = "editor"
# authors.append(creator.__dict__)
# chapter.creators = authors
# chapter.publisher = book.publisher
# # #print(chapter.to_dict())
# createBookSection(chapter.to_dict())
# get_citation("9ZXH8DDE")
# # # #print()
# # #print(get_books())
# # #print(zot.item_creator_types("bookSection"))

View File

@@ -1,13 +1,15 @@
__all__ = [
"csv_to_list",
"pdf_to_csv",
"word_to_semap",
"elsa_word_to_csv",
"eml_parser",
"eml_to_semap",
"pdf_to_csv",
"pdf_to_semap",
"word_to_semap",
]
from .csv_parser import csv_to_list
from .pdf_parser import pdf_to_csv
from .word_parser import word_to_semap
from .word_parser import elsa_word_to_csv, pdf_to_semap, word_to_semap
from .xml_parser import eml_parser, eml_to_semap

View File

@@ -1,23 +1,28 @@
import csv
from pathlib import Path
from charset_normalizer import detect
from src.core.models import Book, SemapDocument
def csv_to_list(path: str) -> list[str]:
"""
Extracts the data from a csv file and returns it as a pandas dataframe
"""
def csv_to_list(path: str) -> SemapDocument:
"""Extract the data from a csv file and return it as a minimal SemapDocument."""
encoding = detect(open(path, "rb").read())["encoding"]
with open(path, newline="", encoding=encoding) as csvfile:
with Path(path).open(newline="", encoding=encoding) as csvfile:
# if decoder fails to map, assign ""
reader = csv.reader(csvfile, delimiter=";", quotechar="|")
ret = []
for row in reader:
ret.append(row[0].replace('"', ""))
return ret
books = [Book(signature=row) for row in ret]
return SemapDocument(books=books)
if __name__ == "__main__":
text = csv_to_list("C:/Users/aky547/Desktop/semap/71.csv")
text = csv_to_list("C:/Users/aky547/Desktop/semap/sap71.csv")
# remove linebreaks
# #print(text)
# debug: print result when running as script
# print(text)

View File

@@ -6,3 +6,11 @@ from .transformers import (
RDSData,
RISData,
)
# Explicit re-exports to avoid F401 warnings
RDS_AVAIL_DATA = RDS_AVAIL_DATA
ARRAYData = ARRAYData
BibTeXData = BibTeXData
COinSData = COinSData
RDSData = RDSData
RISData = RISData

View File

@@ -2,20 +2,14 @@ from __future__ import annotations
import json
import re
import sys
from dataclasses import dataclass
from dataclasses import field as dataclass_field
from typing import Any, List
import loguru
from src.shared.logging import log, get_bloat_logger, preview
from src.core.models import BookData
from src import LOG_DIR
from src.logic.dataclass import BookData
log = loguru.logger
log.remove()
log.add(sys.stdout, level="INFO")
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
# use centralized logging configured in application startup
###Pydatnic models
@@ -124,7 +118,6 @@ class BaseStruct:
class ARRAYData:
def __init__(self, signature=None) -> None:
self.signature = None
pass
def transform(self, data: str) -> BookData:
def _get_line(source: str, search: str) -> str:
@@ -148,7 +141,7 @@ class ARRAYData:
source = source.replace("\t", "").replace("\r", "")
source = source.split(search)[1].split(")")[0]
return _get_line(source, entry).replace("=>", "").strip()
except:
except Exception:
return ""
def _get_isbn(source: str) -> list:
@@ -164,7 +157,7 @@ class ARRAYData:
continue
ret.append(isb) if isb not in ret else None
return ret
except:
except Exception:
isbn = []
return isbn
@@ -223,7 +216,9 @@ class ARRAYData:
def _get_adis_idn(data, signature):
loksatz_match = re.search(
r"\[loksatz\] => Array\s*\((.*?)\)", data, re.DOTALL
r"\[loksatz\] => Array\s*\((.*?)\)",
data,
re.DOTALL,
)
if loksatz_match:
loksatz_content = loksatz_match.group(1)
@@ -238,7 +233,9 @@ class ARRAYData:
def _get_in_apparat(data):
loksatz_match = re.search(
r"\[loksatz\] => Array\s*\((.*?)\)", data, re.DOTALL
r"\[loksatz\] => Array\s*\((.*?)\)",
data,
re.DOTALL,
)
if loksatz_match:
loksatz_content = loksatz_match.group(1)
@@ -250,8 +247,7 @@ class ARRAYData:
data = eval(obj)
if data["ausleihcode"] == "R" and data["standort"] == "40":
return True
else:
return False
return False
ppn = _get_line(data, "[kid]")
title = _get_title(data).strip()
@@ -298,7 +294,7 @@ class COinSData:
try:
data = source.split(f"{search}=")[1] # .split("")[0].strip()
return data.split("rft")[0].strip() if "rft" in data else data
except:
except Exception:
return ""
return BookData(
@@ -323,7 +319,7 @@ class RISData:
try:
data = source.split(f"{search} - ")[1] # .split("")[0].strip()
return data.split("\n")[0].strip() if "\n" in data else data
except:
except Exception:
return ""
return BookData(
@@ -360,7 +356,7 @@ class BibTeXData:
.replace("[", "")
.replace("];", "")
)
except:
except Exception:
return ""
return BookData(
@@ -384,7 +380,9 @@ class RDSData:
def transform(self, data: str):
# rds_availability = RDS_AVAIL_DATA()
# rds_data = RDS_GENERIC_DATA()
print(data)
# debug: raw RDS data -> send to bloat logger
bloat = get_bloat_logger()
bloat.debug("RDS raw data (preview): {}", preview(data, 2000))
def __get_raw_data(data: str) -> list:
# create base data to be turned into pydantic classes
@@ -412,10 +410,9 @@ class RDSData:
def return_data(self, option=None):
if option == "rds_availability":
return self.retlist[0]
elif option == "rds_data":
if option == "rds_data":
return self.retlist[1]
else:
return {"rds_availability": self.retlist[0], "rds_data": self.retlist[1]}
return {"rds_availability": self.retlist[0], "rds_data": self.retlist[1]}
class DictToTable:
@@ -462,12 +459,11 @@ class DictToTable:
self.reset()
if mode == "book":
return self.book_assign(data)
elif mode == "hg":
if mode == "hg":
return self.hg_assign(data)
elif mode == "zs":
if mode == "zs":
return self.zs_assign(data)
else:
return None
return None
def book_assign(self, data):
self.type = "book"
@@ -514,7 +510,7 @@ class DictToTable:
if __name__ == "__main__":
with open("daiadata", "r") as f:
with open("daiadata") as f:
data = f.read()
ret = RDSData().transform(data)

View File

@@ -1,4 +1,5 @@
import xml.etree.ElementTree as ET
from src.shared.logging import log, get_bloat_logger, preview
from src.core.models import Apparat, BookData, SemapDocument, XMLMailSubmission
from src.core.semester import Semester
@@ -51,8 +52,9 @@ def parse_xml_submission(xml_string: str) -> XMLMailSubmission:
def eml_parser(path: str) -> XMLMailSubmission:
with open(path, "r", encoding="utf-8") as file:
xml_content = file.read().split("\n\n", 1)[1] # Skip headers
print("EML content loaded, parsing XML...")
print(xml_content)
log.debug("EML content loaded, parsing XML...")
bloat = get_bloat_logger()
bloat.debug("EML raw XML (preview): {}", preview(xml_content, 2000))
return parse_xml_submission(xml_content)

View File

@@ -48,7 +48,8 @@ class Catalogue:
log.info(f"Searching for term: {searchterm}")
links = self.get_book_links(searchterm)
print(links)
# debug: links
# print(links)
for elink in links:
result = self.search(elink)
# in result search for class col-xs-12 rds-dl RDS_LOCATION
@@ -166,15 +167,16 @@ class Catalogue:
# based on PPN, get title, people, edition, year, language, pages, isbn,
link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}"
result = self.search(link)
soup = BeautifulSoup(result, "html.parser")
BeautifulSoup(result, "html.parser")
def get_ppn(self, searchterm: str) -> str | None:
links = self.get_book_links(searchterm)
ppn = None
for link in links:
result = self.search(link)
soup = BeautifulSoup(result, "html.parser")
print(link)
BeautifulSoup(result, "html.parser")
# debug: link
# print(link)
ppn = link.split("/")[-1]
if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn):
return ppn
@@ -266,7 +268,8 @@ class Catalogue:
# Find the signature for the entry whose location mentions "Semesterapparat"
for g in groups:
print(g)
# debug: group contents
# print(g)
loc = g.get("location", "").lower()
if "semesterapparat" in loc:
signature = g.get("signature")

View File

@@ -8,17 +8,19 @@ from bs4 import BeautifulSoup
from ratelimit import limits, sleep_and_retry
from src.core.models import BookData
from src.shared.logging import log
from src.shared.logging import log, get_bloat_logger, preview
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
# bloat logger for large/raw HTTP responses
bloat = get_bloat_logger()
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
BASE = "https://rds.ibs-bw.de"
#
TITLE = "RDS_TITLE"
SIGNATURE = "RDS_SIGNATURE"
EDITION = "RDS_EDITION"
@@ -55,7 +57,7 @@ class WebRequest:
@property
def use_any_book(self):
"""use any book that matches the search term"""
"""Use any book that matches the search term"""
self.use_any = True
log.info("Using any book")
return self
@@ -68,6 +70,7 @@ class WebRequest:
return self
def get_ppn(self, signature: str) -> "WebRequest":
"""Take a book signature as input and set the PPN."""
self.signature = signature
if "+" in signature:
signature = signature.replace("+", "%2B")
@@ -109,7 +112,7 @@ class WebRequest:
def get_data(self) -> Optional[list[str]]:
links = self.get_book_links(self.ppn)
log.debug(f"Links: {links}")
bloat.debug("Links (preview): {}", preview(links, 500))
return_data: list[str] = []
for link in links:
result: str = self.search(link) # type:ignore
@@ -128,40 +131,36 @@ class WebRequest:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
log.error("No <pre> tag found")
return return_data
item_location = location.find(
"div",
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
).text.strip()
log.debug(f"Item location: {item_location}")
if self.use_any:
pre_tag = soup.find_all("pre")
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
item_location = location.find(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
).text.strip()
log.debug(f"Item location: {item_location}")
if self.use_any:
pre_tag = soup.find_all("pre")
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
raise ValueError("No <pre> tag found")
elif f"Semesterapparat-{self.apparat}" in item_location:
pre_tag = soup.find_all("pre")
return_data = []
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
return return_data
else:
log.error(
f"Signature {self.signature} not found in {item_location}"
)
# return_data = []
log.error("No <pre> tag found")
raise ValueError("No <pre> tag found")
if f"Semesterapparat-{self.apparat}" in item_location:
pre_tag = soup.find_all("pre")
return_data = []
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
log.error("No <pre> tag found")
return return_data
log.error(
f"Signature {self.signature} not found in {item_location}",
)
# return_data = []
return return_data
@@ -182,9 +181,8 @@ class WebRequest:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
return return_data
log.error("No <pre> tag found")
return return_data
class BibTextTransformer:
@@ -213,7 +211,7 @@ class BibTextTransformer:
# self.bookdata = BookData(**self.data)
def use_signature(self, signature: str) -> "BibTextTransformer":
"""use the exact signature to search for the book"""
"""Use the exact signature to search for the book"""
self.signature = signature
return self
@@ -251,7 +249,8 @@ class BibTextTransformer:
return self
def return_data(
self, option: Any = None
self,
option: Any = None,
) -> Union[
Optional[BookData],
Optional[RDS_GENERIC_DATA],
@@ -266,6 +265,7 @@ class BibTextTransformer:
Returns:
BookData: a dataclass containing data about the book
"""
if self.data is None:
return None
@@ -311,4 +311,4 @@ if __name__ == "__main__":
link = "CU 8500 K64"
data = WebRequest(71).get_ppn(link).get_data()
bib = BibTextTransformer("ARRAY").get_data().return_data()
log.debug(bib)
bloat.debug("Bib (preview): {}", preview(bib, 1000))

View File

@@ -5,7 +5,7 @@ from pyzotero import zotero
from src import settings
from src.services.webrequest import BibTextTransformer, WebRequest
from src.shared.logging import log
from src.shared.logging import get_bloat_logger, preview
@dataclass
@@ -215,7 +215,10 @@ class ZoteroController:
def createItem(self, item) -> Optional[str]:
resp = self.zot.create_items([item]) # type: ignore
if "successful" in resp.keys():
log.debug(resp)
bloat = get_bloat_logger()
bloat.debug(
"Zotero create_items response (preview): {}", preview(resp, 1000)
)
return resp["successful"]["0"]["key"]
else:
return None
@@ -229,7 +232,8 @@ class ZoteroController:
break
def createHGSection(self, book: Book, data: dict) -> Optional[str]:
log.debug(book)
bloat = get_bloat_logger()
bloat.debug("Zotero Book payload (preview): {}", preview(book.to_dict(), 1000))
chapter = BookSection()
chapter.assign(book)
chapter.pages = data["pages"]
@@ -247,7 +251,9 @@ class ZoteroController:
]
chapter.creators += authors
log.debug(chapter.to_dict())
bloat.debug(
"Zotero Chapter payload (preview): {}", preview(chapter.to_dict(), 1000)
)
return self.createItem(chapter.to_dict())
pass

View File

@@ -22,7 +22,7 @@ class Settings:
def save_settings(self, config_path: str | Path = "config.yaml") -> None:
"""Save the settings to the config file.
Args:
config_path: Path to the configuration file
"""
@@ -37,10 +37,10 @@ class Settings:
@classmethod
def load_settings(cls, config_path: str | Path = "config.yaml") -> dict[str, Any]:
"""Load the settings from the config file.
Args:
config_path: Path to the configuration file
Returns:
Dictionary containing the loaded settings
"""
@@ -56,10 +56,10 @@ class Settings:
def load_config(config_path: str | Path = "config.yaml") -> dict[str, Any]:
"""Convenience function to load configuration.
Args:
config_path: Path to the configuration file
Returns:
Dictionary containing the loaded settings
"""

View File

@@ -8,18 +8,60 @@ log = loguru.logger
_configured = False
def _preview(obj, max_len: int = 200):
try:
s = repr(obj)
except Exception:
try:
s = str(obj)
except Exception:
s = "<unrepresentable>"
if len(s) > max_len:
return s[:max_len] + "..."
return s
def configure(level: str = "INFO", to_stdout: bool = True, rotate_bytes: str = "1 MB"):
"""Configure the global logger and add application and bloat sinks.
The bloat sink only captures records that have ``extra['bloat']`` set to True.
Use ``get_bloat_logger()`` to obtain a logger bound for large/raw data.
"""
global _configured
if _configured:
return log
log.remove()
if to_stdout:
if to_stdout and sys.stdout is not None:
log.add(sys.stdout, level=level)
# application rolling log
log.add(
f"{LOG_DIR}/application.log",
rotation=rotate_bytes,
retention="10 days",
)
if LOG_DIR is not None:
log.add(
f"{LOG_DIR}/application.log",
rotation=rotate_bytes,
retention="10 days",
)
# separate bloat log for large or verbose payloads
log.add(
f"{LOG_DIR}/bloat.log",
rotation="10 MB",
retention="14 days",
level="DEBUG",
filter=lambda record: record["extra"].get("bloat", False),
)
_configured = True
return log
def get_bloat_logger():
"""Return a logger bound to mark records as bloat.
Usage:
bloat = get_bloat_logger()
bloat.debug(large_payload)
"""
return log.bind(bloat=True)
def preview(obj, max_len: int = 200):
"""Public preview helper to create truncated representations for logs."""
return _preview(obj, max_len)

View File

@@ -6,3 +6,11 @@ from .transformers import (
RDSData,
RISData,
)
# Explicit re-exports to avoid F401 warnings
RDS_AVAIL_DATA = RDS_AVAIL_DATA
ARRAYData = ARRAYData
BibTeXData = BibTeXData
COinSData = COinSData
RDSData = RDSData
RISData = RISData

Some files were not shown because too many files have changed in this diff Show More