From f88ff7ad56778a05a15690467e045e735e8ff5b4 Mon Sep 17 00:00:00 2001 From: JuanjoSalvador Date: Fri, 13 Oct 2017 20:37:46 +0200 Subject: [PATCH] refactoring and improvements (fixed #7) --- NyaaPy/__init__.py | 167 +------------------------------------------ NyaaPy/nyaa.py | 33 +++++++++ NyaaPy/nyaapantsu.py | 42 +++++++++++ NyaaPy/utils.py | 107 +++++++++++++++++++++++++++ tests/test.py | 12 ++-- 5 files changed, 189 insertions(+), 172 deletions(-) create mode 100644 NyaaPy/nyaa.py create mode 100644 NyaaPy/nyaapantsu.py create mode 100644 NyaaPy/utils.py diff --git a/NyaaPy/__init__.py b/NyaaPy/__init__.py index 3899790..9dd667f 100644 --- a/NyaaPy/__init__.py +++ b/NyaaPy/__init__.py @@ -1,6 +1,3 @@ -import requests -from bs4 import BeautifulSoup - # Info about the module __version__ = '0.4.1' __author__ = 'Juanjo Salvador' @@ -9,165 +6,5 @@ __url__ = 'http://juanjosalvador.me' __copyright__ = '2017 Juanjo Salvador' __license__ = 'MIT license' -class Nyaa(): - ''' - Return a list of dicts with the results of the query. - ''' - def search(keyword, category, subcategory, filters, page): - if page > 0: - r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}&p={}".format(filters, category, subcategory, keyword, page)) - else: - r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword)) - - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - - torrents = [] - - if rows: - for row in rows: - block = [] - - for td in row.find_all('td'): - if td.find_all('a'): - for link in td.find_all('a'): - if link.get('href')[-9:] != '#comments': - block.append(link.get('href')) - if link.text.rstrip(): - block.append(link.text) - - if td.text.rstrip(): - block.append(td.text.rstrip()) - - try: - torrent = { - 'category': get_categories(block[0]), - 'url': "http://nyaa.si{}".format(block[1]), - 'name': block[2], - 'download_url': "http://nyaa.si{}".format(block[4]), - 'magnet': block[5], - 'size': block[6], - 'date': block[7], - 'seeders': block[8], - 'leechers': block[9], - 'completed_downloads': block[10], - } - - torrents.append(torrent) - except IndexError as ie: - pass - - return torrents - - ''' - Returns an array of dicts with the n last updates of Nyaa.si - ''' - def news(n): - r = requests.get("http://nyaa.si/") - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - - torrents = [] - - for row in rows: - block = [] - - for td in row.find_all('td'): - if td.find_all('a'): - for link in td.find_all('a'): - if link.get('href')[-9:] != '#comments': - block.append(link.get('href')) - if link.text.rstrip(): - block.append(link.text) - - if td.text.rstrip(): - block.append(td.text.rstrip()) - - try: - torrent = { - 'category': get_categories(block[0]), - 'url': "http://nyaa.si{}".format(block[1]), - 'name': block[2], - 'download_url': "http://nyaa.si{}".format(block[4]), - 'magnet': block[5], - 'size': block[6], - 'date': block[7], - 'seeders': block[8], - 'leechers': block[9], - 'completed_downloads': block[10], - } - - torrents.append(torrent) - except IndexError: - pass - - - return torrents[:n] - -class NyaaPantsu(): - ''' - Make a query to nyaa.pantsu.cat using keyword as keyword. - Returns an array of OrderedDict with every result of the query. - Returns an empty array if no results. - ''' - def search(keyword): - nyaapantsu_baseurl = "https://nyaa.pantsu.cat/feed?c=_&s=0&max=99999&userID=0&q=" - - request = requests.get(nyaa_baseurl + keyword) - response = xmltodict.parse(request.text) - - results = [] - - try: - if type(response['rss']['channel']['item']) is collections.OrderedDict: - results.append(response['rss']['channel']['item']) - else: - results = response['rss']['channel']['item'] - - except KeyError as ex: - results = [] - - return results - - ''' - Returns an array of OrderedDict with the n last updates of nyaa.pantsu.cat - ''' - def news(n): - nyaa_baseurl = "https://nyaa.pantsu.cat/feed" - - request = requests.get(nyaa_baseurl) - response = xmltodict.parse(request.text) - - results = response['rss']['channel']['item'] - - return results[:n] - -# Auxiliar functions - -def get_categories(b): - c = b.replace('/?c=', '') - cats = c.split('_') - - cat = cats[0] - subcat = cats[1] - - categories = { - "1": { - "name": "Anime", - "subcats": { - "1": "test", - "2": "test", - "3": "test", - "4": "test", - "5": "test", - "6": "test", - "7": "test" - } - }, - "2": "Audio", - "3": "Literature", - "4": "Live Action", - "5": "Pictures", - "6": "Software" - } - return "{} - {}".format() \ No newline at end of file +from NyaaPy.nyaa import Nyaa +from NyaaPy.nyaapantsu import NyaaPantsu \ No newline at end of file diff --git a/NyaaPy/nyaa.py b/NyaaPy/nyaa.py new file mode 100644 index 0000000..c7eab47 --- /dev/null +++ b/NyaaPy/nyaa.py @@ -0,0 +1,33 @@ +import requests +from bs4 import BeautifulSoup +from NyaaPy.utils import Utils as utils + +class Nyaa(): + ''' + Return a list of dicts with the results of the query. + ''' + def search(keyword, category, subcategory, filters, page): + if page > 0: + r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}&p={}".format(filters, category, subcategory, keyword, page)) + else: + r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword)) + + soup = BeautifulSoup(r.text, 'html.parser') + rows = soup.select('table tr') + + results = {} + + if rows: + results = utils.parse_nyaa(rows, limit=None) + + return results + + ''' + Returns an array of dicts with the n last updates of Nyaa.si + ''' + def news(number_of_results): + r = requests.get("http://nyaa.si/") + soup = BeautifulSoup(r.text, 'html.parser') + rows = soup.select('table tr') + + return utils.parse_nyaa(rows, limit=number_of_results) \ No newline at end of file diff --git a/NyaaPy/nyaapantsu.py b/NyaaPy/nyaapantsu.py new file mode 100644 index 0000000..699bada --- /dev/null +++ b/NyaaPy/nyaapantsu.py @@ -0,0 +1,42 @@ +import requests +from bs4 import BeautifulSoup +from NyaaPy.utils import Utils as utils + + +class NyaaPantsu(): + ''' + Make a query to nyaa.pantsu.cat using keyword as keyword. + Returns an array of OrderedDict with every result of the query. + Returns an empty array if no results. + ''' + def search(keyword): + nyaapantsu_baseurl = "https://nyaa.pantsu.cat/feed?c=_&s=0&max=99999&userID=0&q=" + + request = requests.get(nyaa_baseurl + keyword) + response = xmltodict.parse(request.text) + + results = [] + + try: + if type(response['rss']['channel']['item']) is collections.OrderedDict: + results.append(response['rss']['channel']['item']) + else: + results = response['rss']['channel']['item'] + + except KeyError as ex: + results = [] + + return results + + ''' + Returns an array of OrderedDict with the n last updates of nyaa.pantsu.cat + ''' + def news(n): + nyaa_baseurl = "https://nyaa.pantsu.cat/feed" + + request = requests.get(nyaa_baseurl) + response = xmltodict.parse(request.text) + + results = response['rss']['channel']['item'] + + return results[:n] \ No newline at end of file diff --git a/NyaaPy/utils.py b/NyaaPy/utils.py new file mode 100644 index 0000000..253e9e0 --- /dev/null +++ b/NyaaPy/utils.py @@ -0,0 +1,107 @@ +''' + Module utils +''' + +class Utils(): + + def get_categories(b): + c = b.replace('/?c=', '') + cats = c.split('_') + + cat = cats[0] + subcat = cats[1] + + categories = { + "1": { + "name": "Anime", + "subcats": { + "1": "Anime Music Video", + "2": "English-translated", + "3": "Non-English-translated", + "4": "Raw" + } + }, + "2": { + "name": "Audio", + "subcats": { + "1": "Lossless", + "2": "Lossy" + } + }, + "3": { + "name": "Literature", + "subcats": { + "1": "English-translated", + "2": "Non-English-translated", + "3": "Raw" + } + }, + "4": { + "name": "Live Action", + "subcats": { + "1": "English-translated", + "2": "Idol/Promotional Video", + "3": "Non-English-translated", + "4": "Raw" + } + }, + "5": { + "name": "Pictures", + "subcats": { + "1": "Graphics", + "2": "Photos" + } + }, + "6": { + "name": "Software", + "subcats": { + "1": "Applications", + "2": "Games" + } + } + } + + try: + category_name = "{} - {}".format(categories[cat]['name'], categories[cat]['subcats'][subcat]) + except: + pass + + return category_name + + def parse_nyaa(table_rows, limit): + + torrents = [] + + for row in table_rows[:limit]: + block = [] + + for td in row.find_all('td'): + if td.find_all('a'): + for link in td.find_all('a'): + if link.get('href')[-9:] != '#comments': + block.append(link.get('href')) + if link.text.rstrip(): + block.append(link.text) + + if td.text.rstrip(): + block.append(td.text.rstrip()) + + try: + torrent = { + 'category': Utils.get_categories(block[0]), + 'url': "http://nyaa.si{}".format(block[1]), + 'name': block[2], + 'download_url': "http://nyaa.si{}".format(block[4]), + 'magnet': block[5], + 'size': block[6], + 'date': block[7], + 'seeders': block[8], + 'leechers': block[9], + 'completed_downloads': block[10], + } + + torrents.append(torrent) + except IndexError as ie: + pass + + return torrents \ No newline at end of file diff --git a/tests/test.py b/tests/test.py index 96c746f..c170112 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,18 +1,16 @@ -import json, requests from NyaaPy import Nyaa, NyaaPantsu # Nyaa.si results def nyaa_search(): - nyaa_query = Nyaa.search('koe no katachi 1080', 1, 0, 0, 0) + nyaa_query = Nyaa.search(keyword='koe no katachi 1080', category=1, subcategory=0, filters=0, page=0) for nyaa in nyaa_query: - print(nyaa['date']) + print(nyaa) def nyaa_news(): - news = Nyaa.news(5) - - for result in news: - print(result) + news = Nyaa.news(number_of_results=5) + for n in news: + print(n) # Nyaa.pantsu.cat results def pantsu_search():