diff --git a/.gitignore b/.gitignore index 6f7eb31..39ed182 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ dist/ nyaapy.egg-info .vscode env/ -*.pyc \ No newline at end of file +*.pyc +test_files \ No newline at end of file diff --git a/NyaaPy/nyaa.py b/NyaaPy/nyaa.py index c4ec563..8bd9614 100644 --- a/NyaaPy/nyaa.py +++ b/NyaaPy/nyaa.py @@ -1,25 +1,27 @@ import requests -import urllib.parse from NyaaPy import utils class Nyaa: def __init__(self): - self.URI = "http://nyaa.si" + self.SITE = utils.TorrentSite.NYAASI def last_uploads(self, number_of_results): - r = requests.get(self.URI) + r = requests.get(self.SITE.value) # If anything up with nyaa servers let the user know. r.raise_for_status() return utils.parse_nyaa( request_text=r.text, - limit=number_of_results + 1 + limit=number_of_results + 1, + site=self.SITE ) def search(self, keyword, **kwargs): + url = self.SITE.value + user = kwargs.get('user', None) category = kwargs.get('category', 0) subcategory = kwargs.get('subcategory', 0) @@ -33,24 +35,32 @@ class Nyaa: if page > 0: r = requests.get("{}/{}?f={}&c={}_{}&q={}&p={}".format( - self.URI, user_uri, filters, category, subcategory, keyword, + url, user_uri, filters, category, subcategory, keyword, page)) else: r = requests.get("{}/{}?f={}&c={}_{}&q={}".format( - self.URI, user_uri, filters, category, subcategory, keyword)) + url, user_uri, filters, category, subcategory, keyword)) r.raise_for_status() - return utils.parse_nyaa(request_text=r.text, limit=None) + return utils.parse_nyaa( + request_text=r.text, + limit=None, + site=self.SITE + ) def get(self, id): - r = requests.get("{}/view/{}".format(self.URI, id)) + r = requests.get("{}/view/{}".format(self.SITE.value, id)) r.raise_for_status() - return utils.parse_single(request_text=r.text) + return utils.parse_single(request_text=r.text, site=self.SITE) def get_user(self, username): - r = requests.get("{}/user/{}".format(self.URI, username)) + r = requests.get("{}/user/{}".format(self.SITE.value, username)) r.raise_for_status() - return utils.parse_nyaa(request_text=r.text, limit=None) + return utils.parse_nyaa( + request_text=r.text, + limit=None, + site=self.SITE + ) diff --git a/NyaaPy/pantsu.py b/NyaaPy/pantsu.py index f9bef3a..ef6f04f 100644 --- a/NyaaPy/pantsu.py +++ b/NyaaPy/pantsu.py @@ -1,18 +1,24 @@ import requests from NyaaPy import utils + class Pantsu: def __init__(self): self.BASE_URL = "https://nyaa.pantsu.cat/api" - + self.SITE = utils.TorrentSite.NYAANET + def last_uploads(self, number_of_results): - r = requests.get(self.URI) - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - - return utils.parse_nyaa(rows, limit=number_of_results + 1) + r = requests.get(self.SITE.value) + r.raise_for_status() + with open("test.html", "w") as f: + f.write(r.text) + return utils.parse_nyaa( + request_text=r.text, + limit=number_of_results + 1, + site=self.SITE + ) # Torrents - GET def search(self, keyword, **kwargs): @@ -23,10 +29,11 @@ class Pantsu: def view(self, item_id): request = requests.get("{}/view/{}".format(self.BASE_URL, item_id)) + request.raise_for_status() + return request.json() # Torrents - POST - def upload(self): return "Work in progress!" @@ -34,7 +41,6 @@ class Pantsu: return "Work in progress!" # Users - def login(self, username, password): login = requests.post("{}/login/".format( self.BASE_URL), data={'username': username, 'password': password}) diff --git a/NyaaPy/sukebei.py b/NyaaPy/sukebei.py index d0223c2..cab9f88 100644 --- a/NyaaPy/sukebei.py +++ b/NyaaPy/sukebei.py @@ -1,9 +1,14 @@ import requests -from bs4 import BeautifulSoup from NyaaPy import utils + class SukebeiNyaa: + + def __init__(self): + self.SITE = utils.TorrentSite.SUKEBEINYAASI + def search(self, keyword, **kwargs): + uri = self.SITE.value category = kwargs.get('category', 0) subcategory = kwargs.get('subcategory', 0) filters = kwargs.get('filters', 0) @@ -11,37 +16,37 @@ class SukebeiNyaa: if page > 0: r = requests.get("{}/?f={}&c={}_{}&q={}&p={}".format( - "http://sukebei.nyaa.si", filters, category, subcategory, + uri, filters, category, subcategory, keyword, page)) else: r = requests.get("{}/?f={}&c={}_{}&q={}".format( - "http://sukebei.nyaa.si", filters, category, subcategory, + uri, filters, category, subcategory, keyword)) - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - - return utils.parse_nyaa(rows, limit=None) + r.raise_for_status() + return utils.parse_nyaa(r.text, limit=None, site=self.SITE) def get(self, id): - r = requests.get("http://sukebei.nyaa.si/view/{}".format(id)) - soup = BeautifulSoup(r.text, 'html.parser') - content = soup.findAll("div", {"class": "panel", "id": None}) + r = requests.get("{}/view/{}".format(self.SITE.value, id)) + r.raise_for_status() - return utils.parse_single(content) + return utils.parse_single(r.text, self.SITE) def get_user(self, username): - r = requests.get("http://sukebei.nyaa.si/user/{}".format(username)) - soup = BeautifulSoup(r.text, 'html.parser') + r = requests.get("{}/user/{}".format(self.SITE.value, username)) + r.raise_for_status() - return utils.parse_nyaa(soup.select('table tr'), limit=None) + return utils.parse_nyaa(r.text, limit=None, site=self.SITE) - def news(self, number_of_results): - r = requests.get("http://sukebei.nyaa.si/") - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') + def last_uploads(self, number_of_results): + r = requests.get(self.SITE.value) + r.raise_for_status() - return utils.parse_sukebei(rows, limit=number_of_results + 1) + return utils.parse_nyaa( + r.text, + limit=number_of_results + 1, + site=self.SITE + ) class SukebeiPantsu: diff --git a/NyaaPy/utils.py b/NyaaPy/utils.py index 0c837b1..c6b0d71 100644 --- a/NyaaPy/utils.py +++ b/NyaaPy/utils.py @@ -3,9 +3,22 @@ ''' import re +from enum import Enum from lxml import etree +class TorrentSite(Enum): + """ + Contains torrent sites + """ + NYAASI = "https://nyaa.si" + SUKEBEINYAASI = "https://sukebei.nyaa.si" + + # * nyaa.pantsu.cat redirects to nyaa.net + NYAANET = "https://nyaa.net" + SUKEBEINYAANET = "https://sukebei.nyaa.net" + + def nyaa_categories(b): c = b.replace('?c=', '') cats = c.split('_') @@ -72,10 +85,13 @@ def nyaa_categories(b): return category_name -def parse_nyaa(request_text, limit): +def parse_nyaa(request_text, limit, site): parser = etree.HTMLParser() tree = etree.fromstring(request_text, parser) + # Put proper domain here. + uri = site.value + torrents = [] # Going through table rows @@ -94,25 +110,36 @@ def parse_nyaa(request_text, limit): if link.text and link.text.strip(): block.append(link.text.strip()) - if td.text and td.text.strip(): + if td.text is not None and td.text.strip(): block.append(td.text.strip()) # Add type of torrent based on tr class. - if 'danger' in tr.attrib.get("class"): - block.append("remake") - elif 'success' in tr.attrib.get("class"): - block.append("trusted") + if tr.attrib.get("class") is not None: + if 'danger' in tr.attrib.get("class"): + block.append("remake") + elif 'success' in tr.attrib.get("class"): + block.append("trusted") + else: + block.append("default") else: block.append("default") + # Decide category. + if site in [TorrentSite.NYAASI, TorrentSite.NYAANET]: + category = nyaa_categories(block[0]) + elif site in [TorrentSite.SUKEBEINYAASI, TorrentSite.SUKEBEINYAANET]: + category = sukebei_categories(block[0]) + else: + raise ArgumentException("Unknown TorrentSite received!") + # Create torrent object try: torrent = { 'id': block[1], - 'category': nyaa_categories(block[0]), - 'url': "https://nyaa.si/view/{}".format(block[1]), + 'category': category, + 'url': "{}/view/{}".format(uri, block[1]), 'name': block[2], - 'download_url': "https://nyaa.si/download/{}".format(block[3]), + 'download_url': "{}/download/{}".format(uri, block[3]), 'magnet': block[4], 'size': block[5], 'date': block[6], @@ -127,10 +154,13 @@ def parse_nyaa(request_text, limit): return torrents -def parse_single(request_text): +def parse_single(request_text, site): parser = etree.HTMLParser() tree = etree.fromstring(request_text, parser) + # Put proper domain here. + uri = site.value + torrent = {} data = [] torrent_files = [] @@ -152,7 +182,7 @@ def parse_single(request_text): tree.xpath("//h3[@class='panel-title']/text()")[0].strip() torrent['category'] = data[0] torrent['uploader'] = data[4] - torrent['uploader_profile'] = "http://nyaa.si/user/{}".format(data[4]) + torrent['uploader_profile'] = "{}/user/{}".format(uri, data[4]) torrent['website'] = data[6] torrent['size'] = data[8] torrent['date'] = data[3] @@ -169,49 +199,8 @@ def parse_single(request_text): return torrent -def parse_sukebei(table_rows, limit): - if limit == 0: - limit = len(table_rows) - - torrents = [] - - for row in table_rows[:limit]: - block = [] - - for td in row.find_all('td'): - for link in td.find_all('a'): - if link.get('href')[-9:] != '#comments': - block.append(link.get('href')) - block.append(link.text.rstrip()) - - if td.text.rstrip(): - block.append(td.text.rstrip()) - - try: - torrent = { - 'id': block[1].replace("/view/", ""), - 'category': sukebei_categories(block[0]), - 'url': "http://sukebei.nyaa.si{}".format(block[1]), - 'name': block[2], - 'download_url': "http://sukebei.nyaa.si{}".format( - block[4]), - 'magnet': block[5], - 'size': block[6], - 'date': block[7], - 'seeders': block[8], - 'leechers': block[9], - 'completed_downloads': block[10], - } - except IndexError as ie: - pass - - torrents.append(torrent) - - return torrents - - def sukebei_categories(b): - c = b.replace('/?c=', '') + c = b.replace('?c=', '') cats = c.split('_') cat = cats[0] diff --git a/tests/test.py b/tests/test.py index 2a5f5a4..8b9215e 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,40 +1,51 @@ -from NyaaPy import Pantsu, Nyaa +from NyaaPy import Nyaa from pprint import pprint from datetime import datetime +import json +import sys +import os + +# Creating a folder for test_files +# ! not included in github project. +if not os.path.isdir("test_files"): + os.makedirs("test_files") -# pantsu = Pantsu() nyaa = Nyaa() # Get fresh torrents dt_latest_torrents_begin = datetime.now() latest_torrents = nyaa.last_uploads(100) dt_latest_torrents_end = datetime.now() +with open("test_files/nyaa_latest_torrent_test.json", 'w') as f: + json.dump(latest_torrents, f) -# I'd like to watch Tenki no ko, but not uploaded yet. +# Search some nasty stuff dt_search_begin = datetime.now() -test_search = nyaa.search("Kimi no Na wa") +test_search = nyaa.search("kimi no na wa") dt_search_end = datetime.now() -# pprint(test_search) +with open("test_files/nyaa_search_test.json", 'w') as f: + json.dump(test_search, f) # Get first torrent from found torrents -# print("First result torrent info:") dt_single_torrent_begin = datetime.now() single_torrent = nyaa.get(test_search[0]["id"]) dt_single_torrent_end = datetime.now() -#pprint(single_torrent) +with open("test_files/nyaa_single_torrent_test.json", 'w') as f: + json.dump(single_torrent, f) dt_user_begin = datetime.now() -user_torrents = nyaa.get_user("Lilith-Raws") +user_torrents = nyaa.get_user("HorribleSubs") dt_user_end = datetime.now() -#pprint(user_torrents) +with open("test_files/nyaa_single_user_test.json", 'w') as f: + json.dump(user_torrents, f) print( "Latest torrents time:", (dt_latest_torrents_end - dt_latest_torrents_begin).microseconds / 1000, "msec") print( - "Test search time:", - (dt_search_end - dt_search_begin).microseconds/ 1000, + "Test search time:", + (dt_search_end - dt_search_begin).microseconds / 1000, "msec" ) print( @@ -44,11 +55,6 @@ print( ) print( "Single user time:", - (dt_user_end - dt_user_begin ).microseconds / 1000, + (dt_user_end - dt_user_begin).microseconds / 1000, "msec" ) - -""" -print(pantsu.search(keyword='koe no katachi', - lang=["es", "ja"], category=[1, 3])) -""" diff --git a/tests/test_pantsu.py b/tests/test_pantsu.py new file mode 100644 index 0000000..f77e593 --- /dev/null +++ b/tests/test_pantsu.py @@ -0,0 +1,6 @@ +""" +* Pantsu need some serious work +Regular data single_torrent parser not working from other Nyaa alternatives +Needs some work +""" +print("TODO") diff --git a/tests/test_sukebei.py b/tests/test_sukebei.py new file mode 100644 index 0000000..eebaaf6 --- /dev/null +++ b/tests/test_sukebei.py @@ -0,0 +1,58 @@ +from NyaaPy import SukebeiNyaa +from datetime import datetime +import json +import os + +# Creating a folder for test_files +# ! not included in github project. +if not os.path.isdir("test_files"): + os.makedirs("test_files") + +nyaa = SukebeiNyaa() + +# Get fresh torrents +dt_latest_torrents_begin = datetime.now() +latest_torrents = nyaa.last_uploads(100) +dt_latest_torrents_end = datetime.now() +with open("test_files/sukebei_latest_torrent_test.json", 'w') as f: + json.dump(latest_torrents, f) + +# Search some nasty stuff +dt_search_begin = datetime.now() +test_search = nyaa.search("G Senjou no maou") +dt_search_end = datetime.now() +with open("test_files/sukebei_search_test.json", 'w') as f: + json.dump(test_search, f) + +# Get first torrent from found torrents +dt_single_torrent_begin = datetime.now() +single_torrent = nyaa.get(test_search[0]["id"]) +dt_single_torrent_end = datetime.now() +with open("test_files/sukebei_single_torrent_test.json", 'w') as f: + json.dump(single_torrent, f) + +dt_user_begin = datetime.now() +user_torrents = nyaa.get_user("RUNBKK") +dt_user_end = datetime.now() +with open("test_files/sukebei_single_user_test.json", 'w') as f: + json.dump(user_torrents, f) + +print( + "Latest torrents time:", + (dt_latest_torrents_end - dt_latest_torrents_begin).microseconds / 1000, + "msec") +print( + "Test search time:", + (dt_search_end - dt_search_begin).microseconds / 1000, + "msec" +) +print( + "Single torrent time:", + (dt_single_torrent_end - dt_single_torrent_begin).microseconds / 1000, + "msec" +) +print( + "Single user time:", + (dt_user_end - dt_user_begin).microseconds / 1000, + "msec" +)