From 3287efbb4b7c58937ec588fc5c6b116c94f04faf Mon Sep 17 00:00:00 2001 From: Juanjo Salvador Date: Sat, 5 Oct 2019 14:50:47 +0200 Subject: [PATCH 01/10] dev improvements --- NyaaPy/nyaa.py | 108 +++++----- NyaaPy/pantsu.py | 80 ++++---- NyaaPy/sukebei.py | 168 +++++++-------- NyaaPy/utils.py | 506 +++++++++++++++++++++++----------------------- tests/test.py | 15 +- 5 files changed, 438 insertions(+), 439 deletions(-) diff --git a/NyaaPy/nyaa.py b/NyaaPy/nyaa.py index 550985f..8e1dd81 100644 --- a/NyaaPy/nyaa.py +++ b/NyaaPy/nyaa.py @@ -1,54 +1,54 @@ -import requests -import urllib.parse -from bs4 import BeautifulSoup -from NyaaPy.utils import utils - -class Nyaa: - - def __init__(self): - self.URI = "http://nyaa.si" - - def search(self, keyword, **kwargs): - user = kwargs.get('user', None) - category = kwargs.get('category', 0) - subcategory = kwargs.get('subcategory', 0) - filters = kwargs.get('filters', 0) - page = kwargs.get('page', 0) - - if user: - user_uri = "user/{}".format(user) - else: - user_uri = "" - - if page > 0: - r = requests.get("{}/{}?f={}&c={}_{}&q={}&p={}".format( - self.URI, user_uri, filters, category, subcategory, keyword, - page)) - else: - r = requests.get("{}/{}?f={}&c={}_{}&q={}".format( - self.URI, user_uri, filters, category, subcategory, keyword)) - - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - - return utils.parse_nyaa(rows, limit=None) - - def get(self, id): - r = requests.get("{}/view/{}".format(self.URI, id)) - soup = BeautifulSoup(r.text, 'html.parser') - content = soup.findAll("div", {"class": "panel", "id": None}) - - return utils.parse_single(content) - - def get_user(self, username): - r = requests.get("{}/user/{}".format(self.URI, username)) - soup = BeautifulSoup(r.text, 'html.parser') - - return utils.parse_nyaa(soup.select('table tr'), limit=None) - - def news(self, number_of_results): - r = requests.get(self.URI) - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - - return utils.parse_nyaa(rows, limit=number_of_results + 1) +import requests +import urllib.parse +from bs4 import BeautifulSoup +from NyaaPy import utils + +class Nyaa: + + def __init__(self): + self.URI = "http://nyaa.si" + + def search(self, keyword, **kwargs): + user = kwargs.get('user', None) + category = kwargs.get('category', 0) + subcategory = kwargs.get('subcategory', 0) + filters = kwargs.get('filters', 0) + page = kwargs.get('page', 0) + + if user: + user_uri = "user/{}".format(user) + else: + user_uri = "" + + if page > 0: + r = requests.get("{}/{}?f={}&c={}_{}&q={}&p={}".format( + self.URI, user_uri, filters, category, subcategory, keyword, + page)) + else: + r = requests.get("{}/{}?f={}&c={}_{}&q={}".format( + self.URI, user_uri, filters, category, subcategory, keyword)) + + soup = BeautifulSoup(r.text, 'html.parser') + rows = soup.select('table tr') + + return utils.parse_nyaa(rows, limit=None) + + def get(self, id): + r = requests.get("{}/view/{}".format(self.URI, id)) + soup = BeautifulSoup(r.text, 'html.parser') + content = soup.findAll("div", {"class": "panel", "id": None}) + + return utils.parse_single(content) + + def get_user(self, username): + r = requests.get("{}/user/{}".format(self.URI, username)) + soup = BeautifulSoup(r.text, 'html.parser') + + return utils.parse_nyaa(soup.select('table tr'), limit=None) + + def news(self, number_of_results): + r = requests.get(self.URI) + soup = BeautifulSoup(r.text, 'html.parser') + rows = soup.select('table tr') + + return utils.parse_nyaa(rows, limit=number_of_results + 1) diff --git a/NyaaPy/pantsu.py b/NyaaPy/pantsu.py index 3caf98a..7557774 100644 --- a/NyaaPy/pantsu.py +++ b/NyaaPy/pantsu.py @@ -1,40 +1,40 @@ -import requests -from NyaaPy.utils import utils - -class Pantsu: - - def __init__(self): - self.BASE_URL = "https://nyaa.pantsu.cat/api" - - # Torrents - GET - def search(self, keyword, **kwargs): - request = requests.get("{}/search{}".format( - self.BASE_URL, utils.query_builder(keyword, kwargs))) - return request.json() - - def view(self, item_id): - request = requests.get("{}/view/{}".format(self.BASE_URL, item_id)) - - return request.json() - - # Torrents - POST - - def upload(self): - return "Work in progress!" - - def update(self): - return "Work in progress!" - - # Users - - def login(self, username, password): - login = requests.post("{}/login/".format( - self.BASE_URL), data={'username': username, 'password': password}) - - return login.json() - - def profile(self, user_id): - profile = requests.post("{}/profile/".format( - self.BASE_URL), data={'id': user_id}) - - return profile.json() +import requests +from NyaaPy import utils + +class Pantsu: + + def __init__(self): + self.BASE_URL = "https://nyaa.pantsu.cat/api" + + # Torrents - GET + def search(self, keyword, **kwargs): + request = requests.get("{}/search{}".format( + self.BASE_URL, utils.query_builder(keyword, kwargs))) + return request.json() + + def view(self, item_id): + request = requests.get("{}/view/{}".format(self.BASE_URL, item_id)) + + return request.json() + + # Torrents - POST + + def upload(self): + return "Work in progress!" + + def update(self): + return "Work in progress!" + + # Users + + def login(self, username, password): + login = requests.post("{}/login/".format( + self.BASE_URL), data={'username': username, 'password': password}) + + return login.json() + + def profile(self, user_id): + profile = requests.post("{}/profile/".format( + self.BASE_URL), data={'id': user_id}) + + return profile.json() diff --git a/NyaaPy/sukebei.py b/NyaaPy/sukebei.py index 7f03b45..6984ebd 100644 --- a/NyaaPy/sukebei.py +++ b/NyaaPy/sukebei.py @@ -1,84 +1,84 @@ -import requests -from bs4 import BeautifulSoup -from NyaaPy.utils import utils - -class SukebeiNyaa: - def search(self, keyword, **kwargs): - category = kwargs.get('category', 0) - subcategory = kwargs.get('subcategory', 0) - filters = kwargs.get('filters', 0) - page = kwargs.get('page', 0) - - if page > 0: - r = requests.get("{}/?f={}&c={}_{}&q={}&p={}".format( - "http://sukebei.nyaa.si", filters, category, subcategory, - keyword, page)) - else: - r = requests.get("{}/?f={}&c={}_{}&q={}".format( - "http://sukebei.nyaa.si", filters, category, subcategory, - keyword)) - - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - - return utils.parse_nyaa(rows, limit=None) - - def get(self, id): - r = requests.get("http://sukebei.nyaa.si/view/{}".format(id)) - soup = BeautifulSoup(r.text, 'html.parser') - content = soup.findAll("div", {"class": "panel", "id": None}) - - return utils.parse_single(content) - - def get_user(self, username): - r = requests.get("http://sukebei.nyaa.si/user/{}".format(username)) - soup = BeautifulSoup(r.text, 'html.parser') - - return utils.parse_nyaa(soup.select('table tr'), limit=None) - - def news(self, number_of_results): - r = requests.get("http://sukebei.nyaa.si/") - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - - return utils.parse_sukebei(rows, limit=number_of_results + 1) - - -class SukebeiPantsu: - BASE_URL = "https://sukebei.pantsu.cat/api" - - # Torrents - GET - def search(self, keyword, **kwargs): - request = requests.get("{}/search{}".format( - SukebeiPantsu.BASE_URL, utils.query_builder(keyword, kwargs))) - - return request.json() - - def view(self, item_id): - request = requests.get("{}/view/{}".format( - SukebeiPantsu.BASE_URL, item_id)) - - return request.json() - - # Torrents - POST - - def upload(self): - return "Work in progress!" - - def update(self): - return "Work in progress!" - - # Users - - def login(self, username, password): - login = requests.post("{}/login/".format( - SukebeiPantsu.BASE_URL), data={'username': username, - 'password': password}) - - return login.json() - - def profile(self, user_id): - profile = requests.post("{}/profile/".format( - SukebeiPantsu.BASE_URL), data={'id': user_id}) - - return profile.json() +import requests +from bs4 import BeautifulSoup +from NyaaPy import utils + +class SukebeiNyaa: + def search(self, keyword, **kwargs): + category = kwargs.get('category', 0) + subcategory = kwargs.get('subcategory', 0) + filters = kwargs.get('filters', 0) + page = kwargs.get('page', 0) + + if page > 0: + r = requests.get("{}/?f={}&c={}_{}&q={}&p={}".format( + "http://sukebei.nyaa.si", filters, category, subcategory, + keyword, page)) + else: + r = requests.get("{}/?f={}&c={}_{}&q={}".format( + "http://sukebei.nyaa.si", filters, category, subcategory, + keyword)) + + soup = BeautifulSoup(r.text, 'html.parser') + rows = soup.select('table tr') + + return utils.parse_nyaa(rows, limit=None) + + def get(self, id): + r = requests.get("http://sukebei.nyaa.si/view/{}".format(id)) + soup = BeautifulSoup(r.text, 'html.parser') + content = soup.findAll("div", {"class": "panel", "id": None}) + + return utils.parse_single(content) + + def get_user(self, username): + r = requests.get("http://sukebei.nyaa.si/user/{}".format(username)) + soup = BeautifulSoup(r.text, 'html.parser') + + return utils.parse_nyaa(soup.select('table tr'), limit=None) + + def news(self, number_of_results): + r = requests.get("http://sukebei.nyaa.si/") + soup = BeautifulSoup(r.text, 'html.parser') + rows = soup.select('table tr') + + return utils.parse_sukebei(rows, limit=number_of_results + 1) + + +class SukebeiPantsu: + BASE_URL = "https://sukebei.pantsu.cat/api" + + # Torrents - GET + def search(self, keyword, **kwargs): + request = requests.get("{}/search{}".format( + SukebeiPantsu.BASE_URL, utils.query_builder(keyword, kwargs))) + + return request.json() + + def view(self, item_id): + request = requests.get("{}/view/{}".format( + SukebeiPantsu.BASE_URL, item_id)) + + return request.json() + + # Torrents - POST + + def upload(self): + return "Work in progress!" + + def update(self): + return "Work in progress!" + + # Users + + def login(self, username, password): + login = requests.post("{}/login/".format( + SukebeiPantsu.BASE_URL), data={'username': username, + 'password': password}) + + return login.json() + + def profile(self, user_id): + profile = requests.post("{}/profile/".format( + SukebeiPantsu.BASE_URL), data={'id': user_id}) + + return profile.json() diff --git a/NyaaPy/utils.py b/NyaaPy/utils.py index 0f418b2..de46e4b 100644 --- a/NyaaPy/utils.py +++ b/NyaaPy/utils.py @@ -1,253 +1,253 @@ -''' - Module utils -''' - -import re - -def nyaa_categories(b): - c = b.replace('/?c=', '') - cats = c.split('_') - - cat = cats[0] - subcat = cats[1] - - categories = { - "1": { - "name": "Anime", - "subcats": { - "1": "Anime Music Video", - "2": "English-translated", - "3": "Non-English-translated", - "4": "Raw" - } - }, - "2": { - "name": "Audio", - "subcats": { - "1": "Lossless", - "2": "Lossy" - } - }, - "3": { - "name": "Literature", - "subcats": { - "1": "English-translated", - "2": "Non-English-translated", - "3": "Raw" - } - }, - "4": { - "name": "Live Action", - "subcats": { - "1": "English-translated", - "2": "Idol/Promotional Video", - "3": "Non-English-translated", - "4": "Raw" - } - }, - "5": { - "name": "Pictures", - "subcats": { - "1": "Graphics", - "2": "Photos" - } - }, - "6": { - "name": "Software", - "subcats": { - "1": "Applications", - "2": "Games" - } - } - } - - try: - category_name = "{} - {}".format( - categories[cat]['name'], categories[cat]['subcats'][subcat]) - except Exception: - pass - - return category_name - -def parse_nyaa(table_rows, limit): - if limit == 0: - limit = len(table_rows) - - torrents = [] - - for row in table_rows[:limit]: - block = [] - - for td in row.find_all('td'): - if td.find_all('a'): - for link in td.find_all('a'): - if link.get('href')[-9:] != '#comments': - block.append(link.get('href')) - if link.text.rstrip(): - block.append(link.text) - - if td.text.rstrip(): - block.append(td.text.rstrip()) - - if row.has_attr('class'): - if row['class'][0] == 'danger': - block.append("remake") - elif row['class'][0] == 'success': - block.append("trusted") - else: - block.append("default") - - try: - torrent = { - 'id': block[1].replace("/view/", ""), - 'category': nyaa_categories(block[0]), - 'url': "http://nyaa.si{}".format(block[1]), - 'name': block[2], - 'download_url': "http://nyaa.si{}".format(block[4]), - 'magnet': block[5], - 'size': block[6], - 'date': block[7], - 'seeders': block[8], - 'leechers': block[9], - 'completed_downloads': block[10], - 'type': block[11], - } - - torrents.append(torrent) - except IndexError as ie: - pass - - return torrents - -def parse_single(content): - torrent = {} - data = [] - torrent_files = [] - - for row in content[0].find_all('div', {'class': 'row'}): - for div in row.find_all('div', {'class': 'col-md-5'}): - data.append(div.text.replace("\n", "")) - - files = content[2].find('div', - {'class', 'torrent-file-list'}).find_all('li') - - for file in files: - torrent_files.append(file.text) - - torrent['title'] = re.sub('\n|\r|\t', '', content[0].find('h3', { - "class": "panel-title"}).text.replace("\n", "")) - torrent['category'] = data[0] - torrent['uploader'] = data[2] - torrent['uploader_profile'] = "https://nyaa.si/user/{}".format(data[2]) - torrent['website'] = re.sub('\t', '', data[4]) - torrent['size'] = data[6] - torrent['date'] = data[1] - torrent['seeders'] = data[3] - torrent['leechers'] = data[5] - torrent['completed'] = data[7] - torrent['hash'] = data[8] - torrent['description'] = re.sub('\t', '', content[1].find('div', { - 'id': 'torrent-description'}).text) - torrent['files'] = torrent_files - - return torrent - -def parse_sukebei(table_rows, limit): - if limit == 0: - limit = len(table_rows) - - torrents = [] - - for row in table_rows[:limit]: - block = [] - - for td in row.find_all('td'): - for link in td.find_all('a'): - if link.get('href')[-9:] != '#comments': - block.append(link.get('href')) - block.append(link.text.rstrip()) - - if td.text.rstrip(): - block.append(td.text.rstrip()) - - try: - torrent = { - 'id': block[1].replace("/view/", ""), - 'category': sukebei_categories(block[0]), - 'url': "http://sukebei.nyaa.si{}".format(block[1]), - 'name': block[2], - 'download_url': "http://sukebei.nyaa.si{}".format( - block[4]), - 'magnet': block[5], - 'size': block[6], - 'date': block[7], - 'seeders': block[8], - 'leechers': block[9], - 'completed_downloads': block[10], - } - except IndexError as ie: - pass - - torrents.append(torrent) - - return torrents - -def sukebei_categories(b): - c = b.replace('/?c=', '') - cats = c.split('_') - - cat = cats[0] - subcat = cats[1] - - categories = { - "1": { - "name": "Art", - "subcats": { - "1": "Anime", - "2": "Doujinshi", - "3": "Games", - "4": "Manga", - "5": "Pictures", - } - }, - "2": { - "name": "Real Life", - "subcats": { - "1": "Photobooks & Pictures", - "2": "Videos" - } - } - } - - try: - category_name = "{} - {}".format( - categories[cat]['name'], categories[cat]['subcats'][subcat]) - except Exception: - pass - - return category_name - -# Pantsu Utils -def query_builder(q, params): - available_params = ["category", "page", "limit", "userID", "fromID", - "status", "maxage", "toDate", "fromDate", - "dateType", "minSize", "maxSize", "sizeType", - "sort", "order", "lang"] - query = "?q={}".format(q.replace(" ", "+")) - - for param, value in params.items(): - if param in available_params: - if (param != "category" and param != "status" and - param != "lang"): - query += "&{}={}".format(param, value) - elif param == "category": - query += "&c={}_{}".format(value[0], value[1]) - - elif param == "status": - query += "&s={}".format(value) - - elif param == "lang": - for lang in value: - query += "&lang={}".format(lang) - - return query +''' + Module utils +''' + +import re + +def nyaa_categories(b): + c = b.replace('/?c=', '') + cats = c.split('_') + + cat = cats[0] + subcat = cats[1] + + categories = { + "1": { + "name": "Anime", + "subcats": { + "1": "Anime Music Video", + "2": "English-translated", + "3": "Non-English-translated", + "4": "Raw" + } + }, + "2": { + "name": "Audio", + "subcats": { + "1": "Lossless", + "2": "Lossy" + } + }, + "3": { + "name": "Literature", + "subcats": { + "1": "English-translated", + "2": "Non-English-translated", + "3": "Raw" + } + }, + "4": { + "name": "Live Action", + "subcats": { + "1": "English-translated", + "2": "Idol/Promotional Video", + "3": "Non-English-translated", + "4": "Raw" + } + }, + "5": { + "name": "Pictures", + "subcats": { + "1": "Graphics", + "2": "Photos" + } + }, + "6": { + "name": "Software", + "subcats": { + "1": "Applications", + "2": "Games" + } + } + } + + try: + category_name = "{} - {}".format( + categories[cat]['name'], categories[cat]['subcats'][subcat]) + except Exception: + pass + + return category_name + +def parse_nyaa(table_rows, limit): + if limit == 0: + limit = len(table_rows) + + torrents = [] + + for row in table_rows[:limit]: + block = [] + + for td in row.find_all('td'): + if td.find_all('a'): + for link in td.find_all('a'): + if link.get('href')[-9:] != '#comments': + block.append(link.get('href')) + if link.text.rstrip(): + block.append(link.text) + + if td.text.rstrip(): + block.append(td.text.rstrip()) + + if row.has_attr('class'): + if row['class'][0] == 'danger': + block.append("remake") + elif row['class'][0] == 'success': + block.append("trusted") + else: + block.append("default") + + try: + torrent = { + 'id': block[1].replace("/view/", ""), + 'category': nyaa_categories(block[0]), + 'url': "http://nyaa.si{}".format(block[1]), + 'name': block[2], + 'download_url': "http://nyaa.si{}".format(block[4]), + 'magnet': block[5], + 'size': block[6], + 'date': block[7], + 'seeders': block[8], + 'leechers': block[9], + 'completed_downloads': block[10], + 'type': block[11], + } + + torrents.append(torrent) + except IndexError as ie: + pass + + return torrents + +def parse_single(content): + torrent = {} + data = [] + torrent_files = [] + + for row in content[0].find_all('div', {'class': 'row'}): + for div in row.find_all('div', {'class': 'col-md-5'}): + data.append(div.text.replace("\n", "")) + + files = content[2].find('div', + {'class', 'torrent-file-list'}).find_all('li') + + for file in files: + torrent_files.append(file.text) + + torrent['title'] = re.sub('\n|\r|\t', '', content[0].find('h3', { + "class": "panel-title"}).text.replace("\n", "")) + torrent['category'] = data[0] + torrent['uploader'] = data[2] + torrent['uploader_profile'] = "https://nyaa.si/user/{}".format(data[2]) + torrent['website'] = re.sub('\t', '', data[4]) + torrent['size'] = data[6] + torrent['date'] = data[1] + torrent['seeders'] = data[3] + torrent['leechers'] = data[5] + torrent['completed'] = data[7] + torrent['hash'] = data[8] + torrent['description'] = re.sub('\t', '', content[1].find('div', { + 'id': 'torrent-description'}).text) + torrent['files'] = torrent_files + + return torrent + +def parse_sukebei(table_rows, limit): + if limit == 0: + limit = len(table_rows) + + torrents = [] + + for row in table_rows[:limit]: + block = [] + + for td in row.find_all('td'): + for link in td.find_all('a'): + if link.get('href')[-9:] != '#comments': + block.append(link.get('href')) + block.append(link.text.rstrip()) + + if td.text.rstrip(): + block.append(td.text.rstrip()) + + try: + torrent = { + 'id': block[1].replace("/view/", ""), + 'category': sukebei_categories(block[0]), + 'url': "http://sukebei.nyaa.si{}".format(block[1]), + 'name': block[2], + 'download_url': "http://sukebei.nyaa.si{}".format( + block[4]), + 'magnet': block[5], + 'size': block[6], + 'date': block[7], + 'seeders': block[8], + 'leechers': block[9], + 'completed_downloads': block[10], + } + except IndexError as ie: + pass + + torrents.append(torrent) + + return torrents + +def sukebei_categories(b): + c = b.replace('/?c=', '') + cats = c.split('_') + + cat = cats[0] + subcat = cats[1] + + categories = { + "1": { + "name": "Art", + "subcats": { + "1": "Anime", + "2": "Doujinshi", + "3": "Games", + "4": "Manga", + "5": "Pictures", + } + }, + "2": { + "name": "Real Life", + "subcats": { + "1": "Photobooks & Pictures", + "2": "Videos" + } + } + } + + try: + category_name = "{} - {}".format( + categories[cat]['name'], categories[cat]['subcats'][subcat]) + except Exception: + pass + + return category_name + +# Pantsu Utils +def query_builder(q, params): + available_params = ["category", "page", "limit", "userID", "fromID", + "status", "maxage", "toDate", "fromDate", + "dateType", "minSize", "maxSize", "sizeType", + "sort", "order", "lang"] + query = "?q={}".format(q.replace(" ", "+")) + + for param, value in params.items(): + if param in available_params: + if (param != "category" and param != "status" and + param != "lang"): + query += "&{}={}".format(param, value) + elif param == "category": + query += "&c={}_{}".format(value[0], value[1]) + + elif param == "status": + query += "&s={}".format(value) + + elif param == "lang": + for lang in value: + query += "&lang={}".format(lang) + + return query diff --git a/tests/test.py b/tests/test.py index 0455e34..bed0c60 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,8 +1,7 @@ -from NyaaPy import Pantsu, Nyaa - -pantsu = Pantsu() -nyaa = Nyaa() - -# print(pantsu.search(keyword='koe no katachi', -# lang=["es", "ja"], category=[1, 3])) -print(nyaa.search(keyword='yuru camp')) +from NyaaPy import Pantsu, Nyaa + +pantsu = Pantsu() +nyaa = Nyaa() + +print(pantsu.search(keyword='koe no katachi', + lang=["es", "ja"], category=[1, 3])) \ No newline at end of file From 03f030914b333a3dbe99822650efee4fcc33d2e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferenc=20N=C3=A1n=C3=A1si?= Date: Mon, 13 Jan 2020 23:11:55 +0100 Subject: [PATCH 02/10] LXML based Nyaa parsing demo ready, DO NOT USE IT --- NyaaPy/nyaa.py | 16 ++- NyaaPy/utils_lxml.py | 265 +++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + tests/test.py | 8 +- 4 files changed, 283 insertions(+), 7 deletions(-) create mode 100644 NyaaPy/utils_lxml.py diff --git a/NyaaPy/nyaa.py b/NyaaPy/nyaa.py index d916e06..aaf3275 100644 --- a/NyaaPy/nyaa.py +++ b/NyaaPy/nyaa.py @@ -1,7 +1,7 @@ import requests import urllib.parse -from bs4 import BeautifulSoup -from NyaaPy import utils +from NyaaPy import utils_lxml + class Nyaa: @@ -10,11 +10,16 @@ class Nyaa: def last_uploads(self, number_of_results): r = requests.get(self.URI) - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - return utils.parse_nyaa(table_rows=rows, limit=number_of_results + 1) + # If anything up with nyaa servers let the user know. + r.raise_for_status() + return utils_lxml.parse_nyaa( + request_text=r.text, + limit=number_of_results + 1 + ) + +""" def search(self, keyword, **kwargs): user = kwargs.get('user', None) category = kwargs.get('category', 0) @@ -52,3 +57,4 @@ class Nyaa: soup = BeautifulSoup(r.text, 'html.parser') return utils.parse_nyaa(soup.select('table tr'), limit=None) +""" diff --git a/NyaaPy/utils_lxml.py b/NyaaPy/utils_lxml.py new file mode 100644 index 0000000..f7ea11a --- /dev/null +++ b/NyaaPy/utils_lxml.py @@ -0,0 +1,265 @@ +''' + Module utils +''' + +import re +from lxml import etree +from pprint import pprint + + +def nyaa_categories(b): + c = b.replace('?c=', '') + cats = c.split('_') + + cat = cats[0] + subcat = cats[1] + + categories = { + "1": { + "name": "Anime", + "subcats": { + "1": "Anime Music Video", + "2": "English-translated", + "3": "Non-English-translated", + "4": "Raw" + } + }, + "2": { + "name": "Audio", + "subcats": { + "1": "Lossless", + "2": "Lossy" + } + }, + "3": { + "name": "Literature", + "subcats": { + "1": "English-translated", + "2": "Non-English-translated", + "3": "Raw" + } + }, + "4": { + "name": "Live Action", + "subcats": { + "1": "English-translated", + "2": "Idol/Promotional Video", + "3": "Non-English-translated", + "4": "Raw" + } + }, + "5": { + "name": "Pictures", + "subcats": { + "1": "Graphics", + "2": "Photos" + } + }, + "6": { + "name": "Software", + "subcats": { + "1": "Applications", + "2": "Games" + } + } + } + + try: + category_name = "{} - {}".format( + categories[cat]['name'], categories[cat]['subcats'][subcat]) + except Exception: + pass + + return category_name + + +def parse_nyaa(request_text, limit): + parser = etree.HTMLParser() + tree = etree.fromstring(request_text, parser) + + torrents = [] + + # Going through table rows + for tr in tree.xpath("//tbody//tr")[:limit]: + block = [] + + # Find basic torrent data + for td in tr.xpath("./td"): + for link in td.xpath("./a"): + block.append(link.attrib.get("href").split('/')[-1]) + + if link.text and link.text.rstrip(): + block.append(link.text) + + if td.text and td.text.rstrip(): + block.append(td.text) + + # Add type of torrent based on tr class. + if 'danger' in tr.attrib.get("class"): + block.append("remake") + elif 'success' in tr.attrib.get("class"): + block.append("trusted") + else: + block.append("default") + + # Create torrent object + try: + torrent = { + 'id': block[1], + 'category': nyaa_categories(block[0]), + 'url': "https://nyaa.si/view/{}".format(block[1]), + 'name': block[2], + 'download_url': "https://nyaa.si/download/{}".format(block[3]), + 'magnet': block[4], + 'size': block[5], + 'date': block[6], + 'seeders': block[7], + 'leechers': block[8], + 'completed_downloads': block[9], + 'type': block[10] + } + torrents.append(torrent) + except IndexError: + pass + return torrents + + +# TODO: Parse single is not done yet. +def parse_single(content): + torrent = {} + data = [] + torrent_files = [] + + for row in content[0].find_all('div', {'class': 'row'}): + for div in row.find_all('div', {'class': 'col-md-5'}): + data.append(div.text.replace("\n", "")) + + files = content[2].find('div', + {'class', 'torrent-file-list'}).find_all('li') + + for file in files: + torrent_files.append(file.text) + + torrent['title'] = re.sub('\n|\r|\t', '', content[0].find('h3', { + "class": "panel-title"}).text.replace("\n", "")) + torrent['category'] = data[0] + torrent['uploader'] = data[2] + torrent['uploader_profile'] = "https://nyaa.si/user/{}".format(data[2]) + torrent['website'] = re.sub('\t', '', data[4]) + torrent['size'] = data[6] + torrent['date'] = data[1] + torrent['seeders'] = data[3] + torrent['leechers'] = data[5] + torrent['completed'] = data[7] + torrent['hash'] = data[8] + torrent['description'] = re.sub('\t', '', content[1].find('div', { + 'id': 'torrent-description'}).text) + torrent['files'] = torrent_files + + return torrent + + +# TODO: Not ready +def parse_sukebei(table_rows, limit): + if limit == 0: + limit = len(table_rows) + + torrents = [] + + for row in table_rows[:limit]: + block = [] + + for td in row.find_all('td'): + for link in td.find_all('a'): + if link.get('href')[-9:] != '#comments': + block.append(link.get('href')) + block.append(link.text.rstrip()) + + if td.text.rstrip(): + block.append(td.text.rstrip()) + + try: + torrent = { + 'id': block[1].replace("/view/", ""), + 'category': sukebei_categories(block[0]), + 'url': "http://sukebei.nyaa.si{}".format(block[1]), + 'name': block[2], + 'download_url': "http://sukebei.nyaa.si{}".format( + block[4]), + 'magnet': block[5], + 'size': block[6], + 'date': block[7], + 'seeders': block[8], + 'leechers': block[9], + 'completed_downloads': block[10], + } + except IndexError as ie: + pass + + torrents.append(torrent) + + return torrents + + +# TODO Not ready +def sukebei_categories(b): + c = b.replace('/?c=', '') + cats = c.split('_') + + cat = cats[0] + subcat = cats[1] + + categories = { + "1": { + "name": "Art", + "subcats": { + "1": "Anime", + "2": "Doujinshi", + "3": "Games", + "4": "Manga", + "5": "Pictures", + } + }, + "2": { + "name": "Real Life", + "subcats": { + "1": "Photobooks & Pictures", + "2": "Videos" + } + } + } + + try: + category_name = "{} - {}".format( + categories[cat]['name'], categories[cat]['subcats'][subcat]) + except Exception: + pass + + return category_name + + +# TODO: Not tested +# Pantsu Utils +def query_builder(q, params): + available_params = ["category", "page", "limit", "userID", "fromID", + "status", "maxage", "toDate", "fromDate", + "dateType", "minSize", "maxSize", "sizeType", + "sort", "order", "lang"] + query = "?q={}".format(q.replace(" ", "+")) + + for param, value in params.items(): + if param in available_params: + if (param != "category" and param != "status" and + param != "lang"): + query += "&{}={}".format(param, value) + elif param == "category": + query += "&c={}_{}".format(value[0], value[1]) + + elif param == "status": + query += "&s={}".format(value) + + elif param == "lang": + for lang in value: + query += "&lang={}".format(lang) + + return query diff --git a/requirements.txt b/requirements.txt index ef3a347..19198a3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ requests>=2.20.0 beautifulsoup4==4.6.0 +lxml \ No newline at end of file diff --git a/tests/test.py b/tests/test.py index bed0c60..233f817 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,7 +1,11 @@ from NyaaPy import Pantsu, Nyaa +from pprint import pprint -pantsu = Pantsu() +# pantsu = Pantsu() nyaa = Nyaa() +pprint(nyaa.last_uploads(5)) +""" print(pantsu.search(keyword='koe no katachi', - lang=["es", "ja"], category=[1, 3])) \ No newline at end of file + lang=["es", "ja"], category=[1, 3])) +""" From 95b967b940f3fe528063083a55fbf8ce0a0f4f6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferenc=20N=C3=A1n=C3=A1si?= Date: Wed, 15 Jan 2020 21:05:22 +0100 Subject: [PATCH 03/10] Nyaa.si parser converted to LXML (except view user info) --- NyaaPy/nyaa.py | 20 ++-- NyaaPy/utils.py | 132 ++++++++++++--------- NyaaPy/utils_lxml.py | 265 ------------------------------------------- tests/test.py | 15 ++- 4 files changed, 99 insertions(+), 333 deletions(-) delete mode 100644 NyaaPy/utils_lxml.py diff --git a/NyaaPy/nyaa.py b/NyaaPy/nyaa.py index aaf3275..9165c37 100644 --- a/NyaaPy/nyaa.py +++ b/NyaaPy/nyaa.py @@ -1,6 +1,6 @@ import requests import urllib.parse -from NyaaPy import utils_lxml +from NyaaPy import utils class Nyaa: @@ -14,12 +14,11 @@ class Nyaa: # If anything up with nyaa servers let the user know. r.raise_for_status() - return utils_lxml.parse_nyaa( + return utils.parse_nyaa( request_text=r.text, limit=number_of_results + 1 ) -""" def search(self, keyword, **kwargs): user = kwargs.get('user', None) category = kwargs.get('category', 0) @@ -40,21 +39,20 @@ class Nyaa: r = requests.get("{}/{}?f={}&c={}_{}&q={}".format( self.URI, user_uri, filters, category, subcategory, keyword)) - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') + r.raise_for_status() - return utils.parse_nyaa(rows, limit=None) + return utils.parse_nyaa(request_text=r.text, limit=None) def get(self, id): r = requests.get("{}/view/{}".format(self.URI, id)) - soup = BeautifulSoup(r.text, 'html.parser') - content = soup.findAll("div", {"class": "panel", "id": None}) - - return utils.parse_single(content) + r.raise_for_status() + # ! Description not working TODO + # with open("test.html", "w") as f: + # f.write(r.text) + return utils.parse_single(request_text=r.text) def get_user(self, username): r = requests.get("{}/user/{}".format(self.URI, username)) soup = BeautifulSoup(r.text, 'html.parser') return utils.parse_nyaa(soup.select('table tr'), limit=None) -""" diff --git a/NyaaPy/utils.py b/NyaaPy/utils.py index de46e4b..fbbbdf0 100644 --- a/NyaaPy/utils.py +++ b/NyaaPy/utils.py @@ -3,9 +3,12 @@ ''' import re +from lxml import etree +from pprint import pprint + def nyaa_categories(b): - c = b.replace('/?c=', '') + c = b.replace('?c=', '') cats = c.split('_') cat = cats[0] @@ -69,89 +72,104 @@ def nyaa_categories(b): return category_name -def parse_nyaa(table_rows, limit): - if limit == 0: - limit = len(table_rows) + +def parse_nyaa(request_text, limit): + parser = etree.HTMLParser() + tree = etree.fromstring(request_text, parser) torrents = [] - for row in table_rows[:limit]: + # Going through table rows + for tr in tree.xpath("//tbody//tr")[:limit]: block = [] - for td in row.find_all('td'): - if td.find_all('a'): - for link in td.find_all('a'): - if link.get('href')[-9:] != '#comments': - block.append(link.get('href')) - if link.text.rstrip(): - block.append(link.text) + for td in tr.xpath("./td"): + for link in td.xpath("./a"): - if td.text.rstrip(): - block.append(td.text.rstrip()) + href = link.attrib.get("href").split('/')[-1] - if row.has_attr('class'): - if row['class'][0] == 'danger': - block.append("remake") - elif row['class'][0] == 'success': - block.append("trusted") - else: - block.append("default") + # Only caring about non-comment pages. + if href[-9:] != "#comments": + block.append(href) + if link.text and link.text.strip(): + block.append(link.text.strip()) + + if td.text and td.text.strip(): + block.append(td.text.strip()) + + # Add type of torrent based on tr class. + if 'danger' in tr.attrib.get("class"): + block.append("remake") + elif 'success' in tr.attrib.get("class"): + block.append("trusted") + else: + block.append("default") + + # Create torrent object try: torrent = { - 'id': block[1].replace("/view/", ""), + 'id': block[1], 'category': nyaa_categories(block[0]), - 'url': "http://nyaa.si{}".format(block[1]), + 'url': "https://nyaa.si/view/{}".format(block[1]), 'name': block[2], - 'download_url': "http://nyaa.si{}".format(block[4]), - 'magnet': block[5], - 'size': block[6], - 'date': block[7], - 'seeders': block[8], - 'leechers': block[9], - 'completed_downloads': block[10], - 'type': block[11], + 'download_url': "https://nyaa.si/download/{}".format(block[3]), + 'magnet': block[4], + 'size': block[5], + 'date': block[6], + 'seeders': block[7], + 'leechers': block[8], + 'completed_downloads': block[9], + 'type': block[10] } - torrents.append(torrent) - except IndexError as ie: + except IndexError: pass - return torrents -def parse_single(content): + +def parse_single(request_text): + parser = etree.HTMLParser() + tree = etree.fromstring(request_text, parser) + torrent = {} data = [] torrent_files = [] - for row in content[0].find_all('div', {'class': 'row'}): - for div in row.find_all('div', {'class': 'col-md-5'}): - data.append(div.text.replace("\n", "")) + # Find basic uploader info & torrent stats + for row in tree.xpath("//div[@class='row']"): + for div_text in row.xpath("./div[@class='col-md-5']//text()"): + d = div_text.strip() + if d: + data.append(d) - files = content[2].find('div', - {'class', 'torrent-file-list'}).find_all('li') + # Find files, we need only text of the li element(s). + # Sorry about Pycodestyle aka PEP8 (E501) error + for el in tree.xpath("//div[contains(@class, 'torrent-file-list')]//li/text()"): + if el.rstrip(): + torrent_files.append(el) - for file in files: - torrent_files.append(file.text) - - torrent['title'] = re.sub('\n|\r|\t', '', content[0].find('h3', { - "class": "panel-title"}).text.replace("\n", "")) + torrent['title'] = \ + tree.xpath("//h3[@class='panel-title']/text()")[0].strip() torrent['category'] = data[0] - torrent['uploader'] = data[2] - torrent['uploader_profile'] = "https://nyaa.si/user/{}".format(data[2]) - torrent['website'] = re.sub('\t', '', data[4]) - torrent['size'] = data[6] - torrent['date'] = data[1] - torrent['seeders'] = data[3] - torrent['leechers'] = data[5] - torrent['completed'] = data[7] - torrent['hash'] = data[8] - torrent['description'] = re.sub('\t', '', content[1].find('div', { - 'id': 'torrent-description'}).text) + torrent['uploader'] = data[4] + torrent['uploader_profile'] = "http://nyaa.si/user/{}".format(data[4]) + torrent['website'] = data[6] + torrent['size'] = data[8] + torrent['date'] = data[3] + torrent['seeders'] = data[5] + torrent['leechers'] = data[7] + torrent['completed'] = data[9] + torrent['hash'] = data[10] torrent['files'] = torrent_files + torrent['description'] = "" + for s in tree.xpath("//div[@id='torrent-description']"): + torrent['description'] += s.text + return torrent + def parse_sukebei(table_rows, limit): if limit == 0: limit = len(table_rows) @@ -192,6 +210,7 @@ def parse_sukebei(table_rows, limit): return torrents + def sukebei_categories(b): c = b.replace('/?c=', '') cats = c.split('_') @@ -227,6 +246,7 @@ def sukebei_categories(b): return category_name + # Pantsu Utils def query_builder(q, params): available_params = ["category", "page", "limit", "userID", "fromID", diff --git a/NyaaPy/utils_lxml.py b/NyaaPy/utils_lxml.py deleted file mode 100644 index f7ea11a..0000000 --- a/NyaaPy/utils_lxml.py +++ /dev/null @@ -1,265 +0,0 @@ -''' - Module utils -''' - -import re -from lxml import etree -from pprint import pprint - - -def nyaa_categories(b): - c = b.replace('?c=', '') - cats = c.split('_') - - cat = cats[0] - subcat = cats[1] - - categories = { - "1": { - "name": "Anime", - "subcats": { - "1": "Anime Music Video", - "2": "English-translated", - "3": "Non-English-translated", - "4": "Raw" - } - }, - "2": { - "name": "Audio", - "subcats": { - "1": "Lossless", - "2": "Lossy" - } - }, - "3": { - "name": "Literature", - "subcats": { - "1": "English-translated", - "2": "Non-English-translated", - "3": "Raw" - } - }, - "4": { - "name": "Live Action", - "subcats": { - "1": "English-translated", - "2": "Idol/Promotional Video", - "3": "Non-English-translated", - "4": "Raw" - } - }, - "5": { - "name": "Pictures", - "subcats": { - "1": "Graphics", - "2": "Photos" - } - }, - "6": { - "name": "Software", - "subcats": { - "1": "Applications", - "2": "Games" - } - } - } - - try: - category_name = "{} - {}".format( - categories[cat]['name'], categories[cat]['subcats'][subcat]) - except Exception: - pass - - return category_name - - -def parse_nyaa(request_text, limit): - parser = etree.HTMLParser() - tree = etree.fromstring(request_text, parser) - - torrents = [] - - # Going through table rows - for tr in tree.xpath("//tbody//tr")[:limit]: - block = [] - - # Find basic torrent data - for td in tr.xpath("./td"): - for link in td.xpath("./a"): - block.append(link.attrib.get("href").split('/')[-1]) - - if link.text and link.text.rstrip(): - block.append(link.text) - - if td.text and td.text.rstrip(): - block.append(td.text) - - # Add type of torrent based on tr class. - if 'danger' in tr.attrib.get("class"): - block.append("remake") - elif 'success' in tr.attrib.get("class"): - block.append("trusted") - else: - block.append("default") - - # Create torrent object - try: - torrent = { - 'id': block[1], - 'category': nyaa_categories(block[0]), - 'url': "https://nyaa.si/view/{}".format(block[1]), - 'name': block[2], - 'download_url': "https://nyaa.si/download/{}".format(block[3]), - 'magnet': block[4], - 'size': block[5], - 'date': block[6], - 'seeders': block[7], - 'leechers': block[8], - 'completed_downloads': block[9], - 'type': block[10] - } - torrents.append(torrent) - except IndexError: - pass - return torrents - - -# TODO: Parse single is not done yet. -def parse_single(content): - torrent = {} - data = [] - torrent_files = [] - - for row in content[0].find_all('div', {'class': 'row'}): - for div in row.find_all('div', {'class': 'col-md-5'}): - data.append(div.text.replace("\n", "")) - - files = content[2].find('div', - {'class', 'torrent-file-list'}).find_all('li') - - for file in files: - torrent_files.append(file.text) - - torrent['title'] = re.sub('\n|\r|\t', '', content[0].find('h3', { - "class": "panel-title"}).text.replace("\n", "")) - torrent['category'] = data[0] - torrent['uploader'] = data[2] - torrent['uploader_profile'] = "https://nyaa.si/user/{}".format(data[2]) - torrent['website'] = re.sub('\t', '', data[4]) - torrent['size'] = data[6] - torrent['date'] = data[1] - torrent['seeders'] = data[3] - torrent['leechers'] = data[5] - torrent['completed'] = data[7] - torrent['hash'] = data[8] - torrent['description'] = re.sub('\t', '', content[1].find('div', { - 'id': 'torrent-description'}).text) - torrent['files'] = torrent_files - - return torrent - - -# TODO: Not ready -def parse_sukebei(table_rows, limit): - if limit == 0: - limit = len(table_rows) - - torrents = [] - - for row in table_rows[:limit]: - block = [] - - for td in row.find_all('td'): - for link in td.find_all('a'): - if link.get('href')[-9:] != '#comments': - block.append(link.get('href')) - block.append(link.text.rstrip()) - - if td.text.rstrip(): - block.append(td.text.rstrip()) - - try: - torrent = { - 'id': block[1].replace("/view/", ""), - 'category': sukebei_categories(block[0]), - 'url': "http://sukebei.nyaa.si{}".format(block[1]), - 'name': block[2], - 'download_url': "http://sukebei.nyaa.si{}".format( - block[4]), - 'magnet': block[5], - 'size': block[6], - 'date': block[7], - 'seeders': block[8], - 'leechers': block[9], - 'completed_downloads': block[10], - } - except IndexError as ie: - pass - - torrents.append(torrent) - - return torrents - - -# TODO Not ready -def sukebei_categories(b): - c = b.replace('/?c=', '') - cats = c.split('_') - - cat = cats[0] - subcat = cats[1] - - categories = { - "1": { - "name": "Art", - "subcats": { - "1": "Anime", - "2": "Doujinshi", - "3": "Games", - "4": "Manga", - "5": "Pictures", - } - }, - "2": { - "name": "Real Life", - "subcats": { - "1": "Photobooks & Pictures", - "2": "Videos" - } - } - } - - try: - category_name = "{} - {}".format( - categories[cat]['name'], categories[cat]['subcats'][subcat]) - except Exception: - pass - - return category_name - - -# TODO: Not tested -# Pantsu Utils -def query_builder(q, params): - available_params = ["category", "page", "limit", "userID", "fromID", - "status", "maxage", "toDate", "fromDate", - "dateType", "minSize", "maxSize", "sizeType", - "sort", "order", "lang"] - query = "?q={}".format(q.replace(" ", "+")) - - for param, value in params.items(): - if param in available_params: - if (param != "category" and param != "status" and - param != "lang"): - query += "&{}={}".format(param, value) - elif param == "category": - query += "&c={}_{}".format(value[0], value[1]) - - elif param == "status": - query += "&s={}".format(value) - - elif param == "lang": - for lang in value: - query += "&lang={}".format(lang) - - return query diff --git a/tests/test.py b/tests/test.py index 233f817..caf5875 100644 --- a/tests/test.py +++ b/tests/test.py @@ -3,7 +3,20 @@ from pprint import pprint # pantsu = Pantsu() nyaa = Nyaa() -pprint(nyaa.last_uploads(5)) + +# Get fresh torrents +print("Latest torrents:") +latest_torrents = rnyaa.last_uploads(5) + +# I'd like to watch Tenki no ko, but not uploaded yet. +print("Search results for Kimi no Na wa:") +test_search = nyaa.search("Kimi no Na wa") +pprint(test_search) + +# Get first torrent from found torrents +print("First result torrent info:") +single_torrent = nyaa.get(test_search[0]["id"]) +pprint(single_torrent) """ print(pantsu.search(keyword='koe no katachi', From bf01a922f0cddcec3e1105abadc24b8d9e277d51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferenc=20N=C3=A1n=C3=A1si?= Date: Wed, 22 Jan 2020 19:07:38 +0100 Subject: [PATCH 04/10] Made few tests --- NyaaPy/nyaa.py | 8 +++----- NyaaPy/utils.py | 1 - tests/test.py | 42 ++++++++++++++++++++++++++++++++++++------ 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/NyaaPy/nyaa.py b/NyaaPy/nyaa.py index 9165c37..c4ec563 100644 --- a/NyaaPy/nyaa.py +++ b/NyaaPy/nyaa.py @@ -46,13 +46,11 @@ class Nyaa: def get(self, id): r = requests.get("{}/view/{}".format(self.URI, id)) r.raise_for_status() - # ! Description not working TODO - # with open("test.html", "w") as f: - # f.write(r.text) + return utils.parse_single(request_text=r.text) def get_user(self, username): r = requests.get("{}/user/{}".format(self.URI, username)) - soup = BeautifulSoup(r.text, 'html.parser') + r.raise_for_status() - return utils.parse_nyaa(soup.select('table tr'), limit=None) + return utils.parse_nyaa(request_text=r.text, limit=None) diff --git a/NyaaPy/utils.py b/NyaaPy/utils.py index fbbbdf0..0c837b1 100644 --- a/NyaaPy/utils.py +++ b/NyaaPy/utils.py @@ -4,7 +4,6 @@ import re from lxml import etree -from pprint import pprint def nyaa_categories(b): diff --git a/tests/test.py b/tests/test.py index caf5875..2a5f5a4 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,22 +1,52 @@ from NyaaPy import Pantsu, Nyaa from pprint import pprint +from datetime import datetime # pantsu = Pantsu() nyaa = Nyaa() # Get fresh torrents -print("Latest torrents:") -latest_torrents = rnyaa.last_uploads(5) +dt_latest_torrents_begin = datetime.now() +latest_torrents = nyaa.last_uploads(100) +dt_latest_torrents_end = datetime.now() # I'd like to watch Tenki no ko, but not uploaded yet. -print("Search results for Kimi no Na wa:") +dt_search_begin = datetime.now() test_search = nyaa.search("Kimi no Na wa") -pprint(test_search) +dt_search_end = datetime.now() +# pprint(test_search) # Get first torrent from found torrents -print("First result torrent info:") +# print("First result torrent info:") +dt_single_torrent_begin = datetime.now() single_torrent = nyaa.get(test_search[0]["id"]) -pprint(single_torrent) +dt_single_torrent_end = datetime.now() +#pprint(single_torrent) + +dt_user_begin = datetime.now() +user_torrents = nyaa.get_user("Lilith-Raws") +dt_user_end = datetime.now() +#pprint(user_torrents) + +print( + "Latest torrents time:", + (dt_latest_torrents_end - dt_latest_torrents_begin).microseconds / 1000, + "msec") +print( + "Test search time:", + (dt_search_end - dt_search_begin).microseconds/ 1000, + "msec" +) +print( + "Single torrent time:", + (dt_single_torrent_end - dt_single_torrent_begin).microseconds / 1000, + "msec" +) +print( + "Single user time:", + (dt_user_end - dt_user_begin ).microseconds / 1000, + "msec" +) """ print(pantsu.search(keyword='koe no katachi', From 5c93e516ba364d448335ebdc6989f8e1724c88c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferenc=20N=C3=A1n=C3=A1si?= Date: Sat, 1 Feb 2020 15:11:35 +0100 Subject: [PATCH 05/10] Nyaa.si & sukebei.nyaa.si LXML fully ready --- .gitignore | 3 +- NyaaPy/nyaa.py | 3 +- NyaaPy/sukebei.py | 42 +++++++++++++++------------- NyaaPy/utils.py | 65 +++++++++++-------------------------------- tests/test.py | 40 +++++++++++++++----------- tests/test_sukebei.py | 60 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 126 insertions(+), 87 deletions(-) create mode 100644 tests/test_sukebei.py diff --git a/.gitignore b/.gitignore index 6f7eb31..39ed182 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ dist/ nyaapy.egg-info .vscode env/ -*.pyc \ No newline at end of file +*.pyc +test_files \ No newline at end of file diff --git a/NyaaPy/nyaa.py b/NyaaPy/nyaa.py index c4ec563..3719089 100644 --- a/NyaaPy/nyaa.py +++ b/NyaaPy/nyaa.py @@ -1,12 +1,11 @@ import requests -import urllib.parse from NyaaPy import utils class Nyaa: def __init__(self): - self.URI = "http://nyaa.si" + self.URI = "https://nyaa.si" def last_uploads(self, number_of_results): r = requests.get(self.URI) diff --git a/NyaaPy/sukebei.py b/NyaaPy/sukebei.py index d0223c2..40340a3 100644 --- a/NyaaPy/sukebei.py +++ b/NyaaPy/sukebei.py @@ -1,8 +1,12 @@ import requests -from bs4 import BeautifulSoup from NyaaPy import utils + class SukebeiNyaa: + + def __init__(self): + self.URI = "https://sukebei.nyaa.si" + def search(self, keyword, **kwargs): category = kwargs.get('category', 0) subcategory = kwargs.get('subcategory', 0) @@ -11,37 +15,37 @@ class SukebeiNyaa: if page > 0: r = requests.get("{}/?f={}&c={}_{}&q={}&p={}".format( - "http://sukebei.nyaa.si", filters, category, subcategory, + self.URI, filters, category, subcategory, keyword, page)) else: r = requests.get("{}/?f={}&c={}_{}&q={}".format( - "http://sukebei.nyaa.si", filters, category, subcategory, + self.URI, filters, category, subcategory, keyword)) - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - - return utils.parse_nyaa(rows, limit=None) + r.raise_for_status() + return utils.parse_nyaa(r.text, limit=None, sukebei=True) def get(self, id): - r = requests.get("http://sukebei.nyaa.si/view/{}".format(id)) - soup = BeautifulSoup(r.text, 'html.parser') - content = soup.findAll("div", {"class": "panel", "id": None}) + r = requests.get("{}/view/{}".format(self.URI, id)) + r.raise_for_status() - return utils.parse_single(content) + return utils.parse_single(r.text, sukebei=True) def get_user(self, username): - r = requests.get("http://sukebei.nyaa.si/user/{}".format(username)) - soup = BeautifulSoup(r.text, 'html.parser') + r = requests.get("{}/user/{}".format(self.URI, username)) + r.raise_for_status() - return utils.parse_nyaa(soup.select('table tr'), limit=None) + return utils.parse_nyaa(r.text, limit=None, sukebei=True) - def news(self, number_of_results): - r = requests.get("http://sukebei.nyaa.si/") - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') + def last_uploads(self, number_of_results): + r = requests.get(self.URI) + r.raise_for_status() - return utils.parse_sukebei(rows, limit=number_of_results + 1) + return utils.parse_nyaa( + r.text, + limit=number_of_results + 1, + sukebei=True + ) class SukebeiPantsu: diff --git a/NyaaPy/utils.py b/NyaaPy/utils.py index 0c837b1..e221761 100644 --- a/NyaaPy/utils.py +++ b/NyaaPy/utils.py @@ -72,10 +72,15 @@ def nyaa_categories(b): return category_name -def parse_nyaa(request_text, limit): +def parse_nyaa(request_text, limit, sukebei=False): parser = etree.HTMLParser() tree = etree.fromstring(request_text, parser) + if sukebei is False: + uri = "https://nyaa.si" + else: + uri = "https://sukebei.nyaa.si" + torrents = [] # Going through table rows @@ -109,10 +114,10 @@ def parse_nyaa(request_text, limit): try: torrent = { 'id': block[1], - 'category': nyaa_categories(block[0]), - 'url': "https://nyaa.si/view/{}".format(block[1]), + 'category': nyaa_categories(block[0]) if sukebei is False else sukebei_categories(block[0]), + 'url': "{}/view/{}".format(uri, block[1]), 'name': block[2], - 'download_url': "https://nyaa.si/download/{}".format(block[3]), + 'download_url': "{}/download/{}".format(uri, block[3]), 'magnet': block[4], 'size': block[5], 'date': block[6], @@ -127,10 +132,15 @@ def parse_nyaa(request_text, limit): return torrents -def parse_single(request_text): +def parse_single(request_text, sukebei=False): parser = etree.HTMLParser() tree = etree.fromstring(request_text, parser) + if sukebei is False: + uri = "https://nyaa.si" + else: + uri = "https://sukebei.nyaa.si" + torrent = {} data = [] torrent_files = [] @@ -152,7 +162,7 @@ def parse_single(request_text): tree.xpath("//h3[@class='panel-title']/text()")[0].strip() torrent['category'] = data[0] torrent['uploader'] = data[4] - torrent['uploader_profile'] = "http://nyaa.si/user/{}".format(data[4]) + torrent['uploader_profile'] = "{}/user/{}".format(uri, data[4]) torrent['website'] = data[6] torrent['size'] = data[8] torrent['date'] = data[3] @@ -169,49 +179,8 @@ def parse_single(request_text): return torrent -def parse_sukebei(table_rows, limit): - if limit == 0: - limit = len(table_rows) - - torrents = [] - - for row in table_rows[:limit]: - block = [] - - for td in row.find_all('td'): - for link in td.find_all('a'): - if link.get('href')[-9:] != '#comments': - block.append(link.get('href')) - block.append(link.text.rstrip()) - - if td.text.rstrip(): - block.append(td.text.rstrip()) - - try: - torrent = { - 'id': block[1].replace("/view/", ""), - 'category': sukebei_categories(block[0]), - 'url': "http://sukebei.nyaa.si{}".format(block[1]), - 'name': block[2], - 'download_url': "http://sukebei.nyaa.si{}".format( - block[4]), - 'magnet': block[5], - 'size': block[6], - 'date': block[7], - 'seeders': block[8], - 'leechers': block[9], - 'completed_downloads': block[10], - } - except IndexError as ie: - pass - - torrents.append(torrent) - - return torrents - - def sukebei_categories(b): - c = b.replace('/?c=', '') + c = b.replace('?c=', '') cats = c.split('_') cat = cats[0] diff --git a/tests/test.py b/tests/test.py index 2a5f5a4..8b9215e 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,40 +1,51 @@ -from NyaaPy import Pantsu, Nyaa +from NyaaPy import Nyaa from pprint import pprint from datetime import datetime +import json +import sys +import os + +# Creating a folder for test_files +# ! not included in github project. +if not os.path.isdir("test_files"): + os.makedirs("test_files") -# pantsu = Pantsu() nyaa = Nyaa() # Get fresh torrents dt_latest_torrents_begin = datetime.now() latest_torrents = nyaa.last_uploads(100) dt_latest_torrents_end = datetime.now() +with open("test_files/nyaa_latest_torrent_test.json", 'w') as f: + json.dump(latest_torrents, f) -# I'd like to watch Tenki no ko, but not uploaded yet. +# Search some nasty stuff dt_search_begin = datetime.now() -test_search = nyaa.search("Kimi no Na wa") +test_search = nyaa.search("kimi no na wa") dt_search_end = datetime.now() -# pprint(test_search) +with open("test_files/nyaa_search_test.json", 'w') as f: + json.dump(test_search, f) # Get first torrent from found torrents -# print("First result torrent info:") dt_single_torrent_begin = datetime.now() single_torrent = nyaa.get(test_search[0]["id"]) dt_single_torrent_end = datetime.now() -#pprint(single_torrent) +with open("test_files/nyaa_single_torrent_test.json", 'w') as f: + json.dump(single_torrent, f) dt_user_begin = datetime.now() -user_torrents = nyaa.get_user("Lilith-Raws") +user_torrents = nyaa.get_user("HorribleSubs") dt_user_end = datetime.now() -#pprint(user_torrents) +with open("test_files/nyaa_single_user_test.json", 'w') as f: + json.dump(user_torrents, f) print( "Latest torrents time:", (dt_latest_torrents_end - dt_latest_torrents_begin).microseconds / 1000, "msec") print( - "Test search time:", - (dt_search_end - dt_search_begin).microseconds/ 1000, + "Test search time:", + (dt_search_end - dt_search_begin).microseconds / 1000, "msec" ) print( @@ -44,11 +55,6 @@ print( ) print( "Single user time:", - (dt_user_end - dt_user_begin ).microseconds / 1000, + (dt_user_end - dt_user_begin).microseconds / 1000, "msec" ) - -""" -print(pantsu.search(keyword='koe no katachi', - lang=["es", "ja"], category=[1, 3])) -""" diff --git a/tests/test_sukebei.py b/tests/test_sukebei.py new file mode 100644 index 0000000..5a2965a --- /dev/null +++ b/tests/test_sukebei.py @@ -0,0 +1,60 @@ +from NyaaPy import SukebeiNyaa +from pprint import pprint +from datetime import datetime +import json +import sys +import os + +# Creating a folder for test_files +# ! not included in github project. +if not os.path.isdir("test_files"): + os.makedirs("test_files") + +nyaa = SukebeiNyaa() + +# Get fresh torrents +dt_latest_torrents_begin = datetime.now() +latest_torrents = nyaa.last_uploads(100) +dt_latest_torrents_end = datetime.now() +with open("test_files/sukebei_latest_torrent_test.json", 'w') as f: + json.dump(latest_torrents, f) + +# Search some nasty stuff +dt_search_begin = datetime.now() +test_search = nyaa.search("G Senjou no maou") +dt_search_end = datetime.now() +with open("test_files/sukebei_search_test.json", 'w') as f: + json.dump(test_search, f) + +# Get first torrent from found torrents +dt_single_torrent_begin = datetime.now() +single_torrent = nyaa.get(test_search[0]["id"]) +dt_single_torrent_end = datetime.now() +with open("test_files/sukebei_single_torrent_test.json", 'w') as f: + json.dump(single_torrent, f) + +dt_user_begin = datetime.now() +user_torrents = nyaa.get_user("RUNBKK") +dt_user_end = datetime.now() +with open("test_files/sukebei_single_user_test.json", 'w') as f: + json.dump(user_torrents, f) + +print( + "Latest torrents time:", + (dt_latest_torrents_end - dt_latest_torrents_begin).microseconds / 1000, + "msec") +print( + "Test search time:", + (dt_search_end - dt_search_begin).microseconds / 1000, + "msec" +) +print( + "Single torrent time:", + (dt_single_torrent_end - dt_single_torrent_begin).microseconds / 1000, + "msec" +) +print( + "Single user time:", + (dt_user_end - dt_user_begin).microseconds / 1000, + "msec" +) From fd28e65c8beb200f9c445435f722fee15e0e587c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferenc=20N=C3=A1n=C3=A1si?= Date: Sat, 1 Feb 2020 15:11:35 +0100 Subject: [PATCH 06/10] Nyaa.si & sukebei.nyaa.si LXML fully ready --- .gitignore | 3 +- NyaaPy/nyaa.py | 32 ++++++++++----- NyaaPy/pantsu.py | 22 ++++++---- NyaaPy/sukebei.py | 43 +++++++++++--------- NyaaPy/utils.py | 95 +++++++++++++++++++------------------------ tests/test.py | 40 ++++++++++-------- tests/test_pantsu.py | 6 +++ tests/test_sukebei.py | 58 ++++++++++++++++++++++++++ 8 files changed, 190 insertions(+), 109 deletions(-) create mode 100644 tests/test_pantsu.py create mode 100644 tests/test_sukebei.py diff --git a/.gitignore b/.gitignore index 6f7eb31..39ed182 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ dist/ nyaapy.egg-info .vscode env/ -*.pyc \ No newline at end of file +*.pyc +test_files \ No newline at end of file diff --git a/NyaaPy/nyaa.py b/NyaaPy/nyaa.py index c4ec563..8bd9614 100644 --- a/NyaaPy/nyaa.py +++ b/NyaaPy/nyaa.py @@ -1,25 +1,27 @@ import requests -import urllib.parse from NyaaPy import utils class Nyaa: def __init__(self): - self.URI = "http://nyaa.si" + self.SITE = utils.TorrentSite.NYAASI def last_uploads(self, number_of_results): - r = requests.get(self.URI) + r = requests.get(self.SITE.value) # If anything up with nyaa servers let the user know. r.raise_for_status() return utils.parse_nyaa( request_text=r.text, - limit=number_of_results + 1 + limit=number_of_results + 1, + site=self.SITE ) def search(self, keyword, **kwargs): + url = self.SITE.value + user = kwargs.get('user', None) category = kwargs.get('category', 0) subcategory = kwargs.get('subcategory', 0) @@ -33,24 +35,32 @@ class Nyaa: if page > 0: r = requests.get("{}/{}?f={}&c={}_{}&q={}&p={}".format( - self.URI, user_uri, filters, category, subcategory, keyword, + url, user_uri, filters, category, subcategory, keyword, page)) else: r = requests.get("{}/{}?f={}&c={}_{}&q={}".format( - self.URI, user_uri, filters, category, subcategory, keyword)) + url, user_uri, filters, category, subcategory, keyword)) r.raise_for_status() - return utils.parse_nyaa(request_text=r.text, limit=None) + return utils.parse_nyaa( + request_text=r.text, + limit=None, + site=self.SITE + ) def get(self, id): - r = requests.get("{}/view/{}".format(self.URI, id)) + r = requests.get("{}/view/{}".format(self.SITE.value, id)) r.raise_for_status() - return utils.parse_single(request_text=r.text) + return utils.parse_single(request_text=r.text, site=self.SITE) def get_user(self, username): - r = requests.get("{}/user/{}".format(self.URI, username)) + r = requests.get("{}/user/{}".format(self.SITE.value, username)) r.raise_for_status() - return utils.parse_nyaa(request_text=r.text, limit=None) + return utils.parse_nyaa( + request_text=r.text, + limit=None, + site=self.SITE + ) diff --git a/NyaaPy/pantsu.py b/NyaaPy/pantsu.py index f9bef3a..ef6f04f 100644 --- a/NyaaPy/pantsu.py +++ b/NyaaPy/pantsu.py @@ -1,18 +1,24 @@ import requests from NyaaPy import utils + class Pantsu: def __init__(self): self.BASE_URL = "https://nyaa.pantsu.cat/api" - + self.SITE = utils.TorrentSite.NYAANET + def last_uploads(self, number_of_results): - r = requests.get(self.URI) - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - - return utils.parse_nyaa(rows, limit=number_of_results + 1) + r = requests.get(self.SITE.value) + r.raise_for_status() + with open("test.html", "w") as f: + f.write(r.text) + return utils.parse_nyaa( + request_text=r.text, + limit=number_of_results + 1, + site=self.SITE + ) # Torrents - GET def search(self, keyword, **kwargs): @@ -23,10 +29,11 @@ class Pantsu: def view(self, item_id): request = requests.get("{}/view/{}".format(self.BASE_URL, item_id)) + request.raise_for_status() + return request.json() # Torrents - POST - def upload(self): return "Work in progress!" @@ -34,7 +41,6 @@ class Pantsu: return "Work in progress!" # Users - def login(self, username, password): login = requests.post("{}/login/".format( self.BASE_URL), data={'username': username, 'password': password}) diff --git a/NyaaPy/sukebei.py b/NyaaPy/sukebei.py index d0223c2..cab9f88 100644 --- a/NyaaPy/sukebei.py +++ b/NyaaPy/sukebei.py @@ -1,9 +1,14 @@ import requests -from bs4 import BeautifulSoup from NyaaPy import utils + class SukebeiNyaa: + + def __init__(self): + self.SITE = utils.TorrentSite.SUKEBEINYAASI + def search(self, keyword, **kwargs): + uri = self.SITE.value category = kwargs.get('category', 0) subcategory = kwargs.get('subcategory', 0) filters = kwargs.get('filters', 0) @@ -11,37 +16,37 @@ class SukebeiNyaa: if page > 0: r = requests.get("{}/?f={}&c={}_{}&q={}&p={}".format( - "http://sukebei.nyaa.si", filters, category, subcategory, + uri, filters, category, subcategory, keyword, page)) else: r = requests.get("{}/?f={}&c={}_{}&q={}".format( - "http://sukebei.nyaa.si", filters, category, subcategory, + uri, filters, category, subcategory, keyword)) - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - - return utils.parse_nyaa(rows, limit=None) + r.raise_for_status() + return utils.parse_nyaa(r.text, limit=None, site=self.SITE) def get(self, id): - r = requests.get("http://sukebei.nyaa.si/view/{}".format(id)) - soup = BeautifulSoup(r.text, 'html.parser') - content = soup.findAll("div", {"class": "panel", "id": None}) + r = requests.get("{}/view/{}".format(self.SITE.value, id)) + r.raise_for_status() - return utils.parse_single(content) + return utils.parse_single(r.text, self.SITE) def get_user(self, username): - r = requests.get("http://sukebei.nyaa.si/user/{}".format(username)) - soup = BeautifulSoup(r.text, 'html.parser') + r = requests.get("{}/user/{}".format(self.SITE.value, username)) + r.raise_for_status() - return utils.parse_nyaa(soup.select('table tr'), limit=None) + return utils.parse_nyaa(r.text, limit=None, site=self.SITE) - def news(self, number_of_results): - r = requests.get("http://sukebei.nyaa.si/") - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') + def last_uploads(self, number_of_results): + r = requests.get(self.SITE.value) + r.raise_for_status() - return utils.parse_sukebei(rows, limit=number_of_results + 1) + return utils.parse_nyaa( + r.text, + limit=number_of_results + 1, + site=self.SITE + ) class SukebeiPantsu: diff --git a/NyaaPy/utils.py b/NyaaPy/utils.py index 0c837b1..c6b0d71 100644 --- a/NyaaPy/utils.py +++ b/NyaaPy/utils.py @@ -3,9 +3,22 @@ ''' import re +from enum import Enum from lxml import etree +class TorrentSite(Enum): + """ + Contains torrent sites + """ + NYAASI = "https://nyaa.si" + SUKEBEINYAASI = "https://sukebei.nyaa.si" + + # * nyaa.pantsu.cat redirects to nyaa.net + NYAANET = "https://nyaa.net" + SUKEBEINYAANET = "https://sukebei.nyaa.net" + + def nyaa_categories(b): c = b.replace('?c=', '') cats = c.split('_') @@ -72,10 +85,13 @@ def nyaa_categories(b): return category_name -def parse_nyaa(request_text, limit): +def parse_nyaa(request_text, limit, site): parser = etree.HTMLParser() tree = etree.fromstring(request_text, parser) + # Put proper domain here. + uri = site.value + torrents = [] # Going through table rows @@ -94,25 +110,36 @@ def parse_nyaa(request_text, limit): if link.text and link.text.strip(): block.append(link.text.strip()) - if td.text and td.text.strip(): + if td.text is not None and td.text.strip(): block.append(td.text.strip()) # Add type of torrent based on tr class. - if 'danger' in tr.attrib.get("class"): - block.append("remake") - elif 'success' in tr.attrib.get("class"): - block.append("trusted") + if tr.attrib.get("class") is not None: + if 'danger' in tr.attrib.get("class"): + block.append("remake") + elif 'success' in tr.attrib.get("class"): + block.append("trusted") + else: + block.append("default") else: block.append("default") + # Decide category. + if site in [TorrentSite.NYAASI, TorrentSite.NYAANET]: + category = nyaa_categories(block[0]) + elif site in [TorrentSite.SUKEBEINYAASI, TorrentSite.SUKEBEINYAANET]: + category = sukebei_categories(block[0]) + else: + raise ArgumentException("Unknown TorrentSite received!") + # Create torrent object try: torrent = { 'id': block[1], - 'category': nyaa_categories(block[0]), - 'url': "https://nyaa.si/view/{}".format(block[1]), + 'category': category, + 'url': "{}/view/{}".format(uri, block[1]), 'name': block[2], - 'download_url': "https://nyaa.si/download/{}".format(block[3]), + 'download_url': "{}/download/{}".format(uri, block[3]), 'magnet': block[4], 'size': block[5], 'date': block[6], @@ -127,10 +154,13 @@ def parse_nyaa(request_text, limit): return torrents -def parse_single(request_text): +def parse_single(request_text, site): parser = etree.HTMLParser() tree = etree.fromstring(request_text, parser) + # Put proper domain here. + uri = site.value + torrent = {} data = [] torrent_files = [] @@ -152,7 +182,7 @@ def parse_single(request_text): tree.xpath("//h3[@class='panel-title']/text()")[0].strip() torrent['category'] = data[0] torrent['uploader'] = data[4] - torrent['uploader_profile'] = "http://nyaa.si/user/{}".format(data[4]) + torrent['uploader_profile'] = "{}/user/{}".format(uri, data[4]) torrent['website'] = data[6] torrent['size'] = data[8] torrent['date'] = data[3] @@ -169,49 +199,8 @@ def parse_single(request_text): return torrent -def parse_sukebei(table_rows, limit): - if limit == 0: - limit = len(table_rows) - - torrents = [] - - for row in table_rows[:limit]: - block = [] - - for td in row.find_all('td'): - for link in td.find_all('a'): - if link.get('href')[-9:] != '#comments': - block.append(link.get('href')) - block.append(link.text.rstrip()) - - if td.text.rstrip(): - block.append(td.text.rstrip()) - - try: - torrent = { - 'id': block[1].replace("/view/", ""), - 'category': sukebei_categories(block[0]), - 'url': "http://sukebei.nyaa.si{}".format(block[1]), - 'name': block[2], - 'download_url': "http://sukebei.nyaa.si{}".format( - block[4]), - 'magnet': block[5], - 'size': block[6], - 'date': block[7], - 'seeders': block[8], - 'leechers': block[9], - 'completed_downloads': block[10], - } - except IndexError as ie: - pass - - torrents.append(torrent) - - return torrents - - def sukebei_categories(b): - c = b.replace('/?c=', '') + c = b.replace('?c=', '') cats = c.split('_') cat = cats[0] diff --git a/tests/test.py b/tests/test.py index 2a5f5a4..8b9215e 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,40 +1,51 @@ -from NyaaPy import Pantsu, Nyaa +from NyaaPy import Nyaa from pprint import pprint from datetime import datetime +import json +import sys +import os + +# Creating a folder for test_files +# ! not included in github project. +if not os.path.isdir("test_files"): + os.makedirs("test_files") -# pantsu = Pantsu() nyaa = Nyaa() # Get fresh torrents dt_latest_torrents_begin = datetime.now() latest_torrents = nyaa.last_uploads(100) dt_latest_torrents_end = datetime.now() +with open("test_files/nyaa_latest_torrent_test.json", 'w') as f: + json.dump(latest_torrents, f) -# I'd like to watch Tenki no ko, but not uploaded yet. +# Search some nasty stuff dt_search_begin = datetime.now() -test_search = nyaa.search("Kimi no Na wa") +test_search = nyaa.search("kimi no na wa") dt_search_end = datetime.now() -# pprint(test_search) +with open("test_files/nyaa_search_test.json", 'w') as f: + json.dump(test_search, f) # Get first torrent from found torrents -# print("First result torrent info:") dt_single_torrent_begin = datetime.now() single_torrent = nyaa.get(test_search[0]["id"]) dt_single_torrent_end = datetime.now() -#pprint(single_torrent) +with open("test_files/nyaa_single_torrent_test.json", 'w') as f: + json.dump(single_torrent, f) dt_user_begin = datetime.now() -user_torrents = nyaa.get_user("Lilith-Raws") +user_torrents = nyaa.get_user("HorribleSubs") dt_user_end = datetime.now() -#pprint(user_torrents) +with open("test_files/nyaa_single_user_test.json", 'w') as f: + json.dump(user_torrents, f) print( "Latest torrents time:", (dt_latest_torrents_end - dt_latest_torrents_begin).microseconds / 1000, "msec") print( - "Test search time:", - (dt_search_end - dt_search_begin).microseconds/ 1000, + "Test search time:", + (dt_search_end - dt_search_begin).microseconds / 1000, "msec" ) print( @@ -44,11 +55,6 @@ print( ) print( "Single user time:", - (dt_user_end - dt_user_begin ).microseconds / 1000, + (dt_user_end - dt_user_begin).microseconds / 1000, "msec" ) - -""" -print(pantsu.search(keyword='koe no katachi', - lang=["es", "ja"], category=[1, 3])) -""" diff --git a/tests/test_pantsu.py b/tests/test_pantsu.py new file mode 100644 index 0000000..f77e593 --- /dev/null +++ b/tests/test_pantsu.py @@ -0,0 +1,6 @@ +""" +* Pantsu need some serious work +Regular data single_torrent parser not working from other Nyaa alternatives +Needs some work +""" +print("TODO") diff --git a/tests/test_sukebei.py b/tests/test_sukebei.py new file mode 100644 index 0000000..eebaaf6 --- /dev/null +++ b/tests/test_sukebei.py @@ -0,0 +1,58 @@ +from NyaaPy import SukebeiNyaa +from datetime import datetime +import json +import os + +# Creating a folder for test_files +# ! not included in github project. +if not os.path.isdir("test_files"): + os.makedirs("test_files") + +nyaa = SukebeiNyaa() + +# Get fresh torrents +dt_latest_torrents_begin = datetime.now() +latest_torrents = nyaa.last_uploads(100) +dt_latest_torrents_end = datetime.now() +with open("test_files/sukebei_latest_torrent_test.json", 'w') as f: + json.dump(latest_torrents, f) + +# Search some nasty stuff +dt_search_begin = datetime.now() +test_search = nyaa.search("G Senjou no maou") +dt_search_end = datetime.now() +with open("test_files/sukebei_search_test.json", 'w') as f: + json.dump(test_search, f) + +# Get first torrent from found torrents +dt_single_torrent_begin = datetime.now() +single_torrent = nyaa.get(test_search[0]["id"]) +dt_single_torrent_end = datetime.now() +with open("test_files/sukebei_single_torrent_test.json", 'w') as f: + json.dump(single_torrent, f) + +dt_user_begin = datetime.now() +user_torrents = nyaa.get_user("RUNBKK") +dt_user_end = datetime.now() +with open("test_files/sukebei_single_user_test.json", 'w') as f: + json.dump(user_torrents, f) + +print( + "Latest torrents time:", + (dt_latest_torrents_end - dt_latest_torrents_begin).microseconds / 1000, + "msec") +print( + "Test search time:", + (dt_search_end - dt_search_begin).microseconds / 1000, + "msec" +) +print( + "Single torrent time:", + (dt_single_torrent_end - dt_single_torrent_begin).microseconds / 1000, + "msec" +) +print( + "Single user time:", + (dt_user_end - dt_user_begin).microseconds / 1000, + "msec" +) From cc565bf945fb1a883f256ffd4420e5abd7814595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferenc=20N=C3=A1n=C3=A1si?= Date: Sat, 1 Feb 2020 17:06:51 +0100 Subject: [PATCH 07/10] Update nyaa.py --- NyaaPy/nyaa.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/NyaaPy/nyaa.py b/NyaaPy/nyaa.py index e4ca1bb..7b9d4c0 100644 --- a/NyaaPy/nyaa.py +++ b/NyaaPy/nyaa.py @@ -5,11 +5,8 @@ from NyaaPy import utils class Nyaa: def __init__(self): -<<<<<<< HEAD self.SITE = utils.TorrentSite.NYAASI -======= self.URI = "https://nyaa.si" ->>>>>>> 5c93e516ba364d448335ebdc6989f8e1724c88c7 def last_uploads(self, number_of_results): r = requests.get(self.SITE.value) From 7a592c4b0506f8e03e94a87e4d13d6d941ac7b8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferenc=20N=C3=A1n=C3=A1si?= Date: Sat, 1 Feb 2020 17:10:22 +0100 Subject: [PATCH 08/10] Some VSCode bug happened it made some conflicts --- tests/test_sukebei.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/test_sukebei.py b/tests/test_sukebei.py index e961c17..eebaaf6 100644 --- a/tests/test_sukebei.py +++ b/tests/test_sukebei.py @@ -1,13 +1,6 @@ from NyaaPy import SukebeiNyaa -<<<<<<< HEAD from datetime import datetime import json -======= -from pprint import pprint -from datetime import datetime -import json -import sys ->>>>>>> 5c93e516ba364d448335ebdc6989f8e1724c88c7 import os # Creating a folder for test_files From d3b2892c1f2a58ef49f50dfe0f4fa167082cd328 Mon Sep 17 00:00:00 2001 From: Juanjo Salvador Date: Sat, 5 Oct 2019 08:50:58 +0200 Subject: [PATCH 09/10] Update HOW-TO-CONTRIBUTE.md --- HOW-TO-CONTRIBUTE.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/HOW-TO-CONTRIBUTE.md b/HOW-TO-CONTRIBUTE.md index 0ea9ba9..15ff81b 100644 --- a/HOW-TO-CONTRIBUTE.md +++ b/HOW-TO-CONTRIBUTE.md @@ -4,8 +4,27 @@ 1. Star the repo, it will help me a lot. 2. Make a fork for you. -3. Use the `dev` branch, never master. +3. Clone the repo into your local machine. +4. Create a new branch for your changes. +5. Start hacking :-) +## Not familiarized with the Python workflow? + +1. Be sure that you have Python 3 and virtualenv installed (if not, install them) +2. Create a new virtualenv + +``` + python -m virtualenv env -p python3 +``` + +3. And activate it! +4. Now it's time to install the dependencies. + +``` + pip install -r requirements.txt +``` + +5. And now you're ready to hack. ## Hacking From 9db8af3a60dfd3a9aa569a632ff7bc96e11563ad Mon Sep 17 00:00:00 2001 From: Dolphin Date: Sun, 13 Dec 2020 21:12:56 +0100 Subject: [PATCH 10/10] Issue #30 Improve the torrent objects --- .gitignore | 4 +++- NyaaPy/__init__.py | 8 ++------ NyaaPy/nyaa.py | 29 ++++++++++++++++++----------- NyaaPy/torrent.py | 17 +++++++++++++++++ NyaaPy/utils.py | 37 ++++++++++++++++--------------------- tests/test.py | 31 +++++++++++++++++++++++++------ tests/test_sukebei.py | 2 +- 7 files changed, 82 insertions(+), 46 deletions(-) create mode 100644 NyaaPy/torrent.py diff --git a/.gitignore b/.gitignore index 39ed182..5648665 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ nyaapy.egg-info .vscode env/ *.pyc -test_files \ No newline at end of file +test_files +venv +.idea \ No newline at end of file diff --git a/NyaaPy/__init__.py b/NyaaPy/__init__.py index 1c2e563..0299c47 100644 --- a/NyaaPy/__init__.py +++ b/NyaaPy/__init__.py @@ -1,11 +1,7 @@ # Info about the module -__version__ = '0.6.0' +__version__ = '0.6.3' __author__ = 'Juanjo Salvador' __email__ = 'juanjosalvador@netc.eu' __url__ = 'http://juanjosalvador.me' __copyright__ = '2017 Juanjo Salvador' -__license__ = 'MIT license' - -from NyaaPy.nyaa import Nyaa -from NyaaPy.pantsu import Pantsu -from NyaaPy.sukebei import SukebeiNyaa, SukebeiPantsu +__license__ = 'MIT license' \ No newline at end of file diff --git a/NyaaPy/nyaa.py b/NyaaPy/nyaa.py index 7b9d4c0..cbd93ff 100644 --- a/NyaaPy/nyaa.py +++ b/NyaaPy/nyaa.py @@ -1,27 +1,29 @@ import requests from NyaaPy import utils +from NyaaPy import torrent class Nyaa: def __init__(self): self.SITE = utils.TorrentSite.NYAASI - self.URI = "https://nyaa.si" + self.URL = "https://nyaa.si" def last_uploads(self, number_of_results): - r = requests.get(self.SITE.value) + r = requests.get(self.URL) # If anything up with nyaa servers let the user know. r.raise_for_status() - return utils.parse_nyaa( + json_data = utils.parse_nyaa( request_text=r.text, limit=number_of_results + 1, site=self.SITE ) + return torrent.json_to_class(json_data) def search(self, keyword, **kwargs): - url = self.SITE.value + url = self.URL user = kwargs.get('user', None) category = kwargs.get('category', 0) @@ -30,7 +32,7 @@ class Nyaa: page = kwargs.get('page', 0) if user: - user_uri = "user/{}".format(user) + user_uri = f"user/{user}" else: user_uri = "" @@ -44,24 +46,29 @@ class Nyaa: r.raise_for_status() - return utils.parse_nyaa( + json_data = utils.parse_nyaa( request_text=r.text, limit=None, site=self.SITE ) - def get(self, id): - r = requests.get("{}/view/{}".format(self.SITE.value, id)) + return torrent.json_to_class(json_data) + + def get(self, view_id): + r = requests.get(f'{self.URL}/view/{view_id}') r.raise_for_status() - return utils.parse_single(request_text=r.text, site=self.SITE) + json_data = utils.parse_single(request_text=r.text, site=self.SITE) + + return torrent.json_to_class(json_data) def get_user(self, username): - r = requests.get("{}/user/{}".format(self.SITE.value, username)) + r = requests.get(f'{self.URL}/user/{username}') r.raise_for_status() - return utils.parse_nyaa( + json_data = utils.parse_nyaa( request_text=r.text, limit=None, site=self.SITE ) + return torrent.json_to_class(json_data) diff --git a/NyaaPy/torrent.py b/NyaaPy/torrent.py new file mode 100644 index 0000000..8269c05 --- /dev/null +++ b/NyaaPy/torrent.py @@ -0,0 +1,17 @@ +def json_to_class(data): + # We check if the data passed is a list or not + if isinstance(data, list): + object_list = [] + for item in data: + object_list.append(Torrent(item)) + # Return a list of Torrent objects + return object_list + else: + return Torrent(data) + + +# This deals with converting the dict to an object +class Torrent(object): + def __init__(self, my_dict): + for key in my_dict: + setattr(self, key, my_dict[key]) diff --git a/NyaaPy/utils.py b/NyaaPy/utils.py index c6b0d71..83c35c7 100644 --- a/NyaaPy/utils.py +++ b/NyaaPy/utils.py @@ -1,8 +1,3 @@ -''' - Module utils -''' - -import re from enum import Enum from lxml import etree @@ -24,12 +19,12 @@ def nyaa_categories(b): cats = c.split('_') cat = cats[0] - subcat = cats[1] + sub_cat = cats[1] categories = { "1": { "name": "Anime", - "subcats": { + "sub_cats": { "1": "Anime Music Video", "2": "English-translated", "3": "Non-English-translated", @@ -38,14 +33,14 @@ def nyaa_categories(b): }, "2": { "name": "Audio", - "subcats": { + "sub_cats": { "1": "Lossless", "2": "Lossy" } }, "3": { "name": "Literature", - "subcats": { + "sub_cats": { "1": "English-translated", "2": "Non-English-translated", "3": "Raw" @@ -53,7 +48,7 @@ def nyaa_categories(b): }, "4": { "name": "Live Action", - "subcats": { + "sub_cats": { "1": "English-translated", "2": "Idol/Promotional Video", "3": "Non-English-translated", @@ -62,14 +57,14 @@ def nyaa_categories(b): }, "5": { "name": "Pictures", - "subcats": { + "sub_cats": { "1": "Graphics", "2": "Photos" } }, "6": { "name": "Software", - "subcats": { + "sub_cats": { "1": "Applications", "2": "Games" } @@ -77,10 +72,10 @@ def nyaa_categories(b): } try: - category_name = "{} - {}".format( - categories[cat]['name'], categories[cat]['subcats'][subcat]) - except Exception: - pass + category_name = f"{categories[cat]['name']} - {categories[cat]['sub_cats'][sub_cat]}" + except KeyError: + print("Unable to get Nyaa category name") + return return category_name @@ -130,7 +125,7 @@ def parse_nyaa(request_text, limit, site): elif site in [TorrentSite.SUKEBEINYAASI, TorrentSite.SUKEBEINYAANET]: category = sukebei_categories(block[0]) else: - raise ArgumentException("Unknown TorrentSite received!") + raise ValueError("Unknown TorrentSite received!") # Create torrent object try: @@ -227,10 +222,10 @@ def sukebei_categories(b): } try: - category_name = "{} - {}".format( - categories[cat]['name'], categories[cat]['subcats'][subcat]) - except Exception: - pass + category_name = f"{categories[cat]['name']} - {categories[cat]['subcats'][subcat]}" + except KeyError: + print("Unable to get Sukebei category name") + return return category_name diff --git a/tests/test.py b/tests/test.py index 8b9215e..10f67fd 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,4 +1,4 @@ -from NyaaPy import Nyaa +from NyaaPy.nyaa import Nyaa from pprint import pprint from datetime import datetime import json @@ -17,27 +17,46 @@ dt_latest_torrents_begin = datetime.now() latest_torrents = nyaa.last_uploads(100) dt_latest_torrents_end = datetime.now() with open("test_files/nyaa_latest_torrent_test.json", 'w') as f: - json.dump(latest_torrents, f) + for torrent in latest_torrents: + try: + # This prints it as byte like objects since unicode is fun + f.write(str(torrent.name.encode('utf-8')) + '\n') + except AttributeError: + f.write('No name found for this torrent') # Search some nasty stuff dt_search_begin = datetime.now() test_search = nyaa.search("kimi no na wa") dt_search_end = datetime.now() with open("test_files/nyaa_search_test.json", 'w') as f: - json.dump(test_search, f) + for torrent in test_search: + try: + # This prints it as byte like objects since unicode is fun + f.write(str(torrent.name.encode('utf-8')) + '\n') + except AttributeError: + f.write('No name found for this torrent') # Get first torrent from found torrents dt_single_torrent_begin = datetime.now() -single_torrent = nyaa.get(test_search[0]["id"]) +single_torrent = test_search[0] dt_single_torrent_end = datetime.now() with open("test_files/nyaa_single_torrent_test.json", 'w') as f: - json.dump(single_torrent, f) + try: + # This prints it as byte like objects since unicode is fun + f.write(str(torrent.name.encode('utf-8')) + '\n') + except AttributeError: + f.write('No name found for this torrent') dt_user_begin = datetime.now() user_torrents = nyaa.get_user("HorribleSubs") dt_user_end = datetime.now() with open("test_files/nyaa_single_user_test.json", 'w') as f: - json.dump(user_torrents, f) + for torrent in user_torrents: + try: + # This prints it as byte like objects since unicode is fun + f.write(str(torrent.name.encode('utf-8')) + '\n') + except AttributeError: + f.write('No name found for this torrent') print( "Latest torrents time:", diff --git a/tests/test_sukebei.py b/tests/test_sukebei.py index eebaaf6..a6ef1cb 100644 --- a/tests/test_sukebei.py +++ b/tests/test_sukebei.py @@ -1,4 +1,4 @@ -from NyaaPy import SukebeiNyaa +from NyaaPy.sukebei import SukebeiNyaa from datetime import datetime import json import os