From 1c5c85e9d97f93543110822c0c16a7f9f60d366e Mon Sep 17 00:00:00 2001 From: JuanjoSalvador Date: Sun, 13 Aug 2017 17:02:15 +0200 Subject: [PATCH 01/13] development version --- NyaaPy/__init__.py | 88 ++++++++++++++++++++++++++++++++++++++++++++++ NyaaPy/nyaa.py | 44 ----------------------- setup.py | 2 +- src/nyaa.py | 54 ---------------------------- tests/test.py | 45 +++++++++++++++++------- 5 files changed, 122 insertions(+), 111 deletions(-) delete mode 100644 NyaaPy/nyaa.py delete mode 100644 src/nyaa.py diff --git a/NyaaPy/__init__.py b/NyaaPy/__init__.py index e69de29..ba26252 100644 --- a/NyaaPy/__init__.py +++ b/NyaaPy/__init__.py @@ -0,0 +1,88 @@ +import requests +import xmltodict +import json +import collections + +# Info about the module +__version__ = '0.4' +__author__ = 'Juanjo Salvador' +__email__ = 'juanjosalvador@netc.eu' +__url__ = 'http://juanjosalvador.me' +__copyright__ = '2017 Juanjo Salvador' +__license__ = 'MIT license' + +class Nyaa(): + ''' + Make a query to nyaa.si using keyword as keyword. + Returns an array of OrderedDict with every result of the query. + Returns an empty array if no results. + ''' + def search(keyword): + nyaa_baseurl = "https://nyaa.si/?page=rss&c=1_0&f=0&q=" + + request = requests.get(nyaa_baseurl + keyword) + response = xmltodict.parse(request.text) + + results = [] + + try: + if type(response['rss']['channel']['item']) is collections.OrderedDict: + results.append(response['rss']['channel']['item']) + else: + results = response['rss']['channel']['item'] + + except KeyError as ex: + results = [] + + return results + + ''' + Returns an array of OrderedDict with the n last updates of Nyaa.si + ''' + def news(n): + nyaa_baseurl = "https://nyaa.si/?page=rss" + + request = requests.get(nyaa_baseurl) + response = xmltodict.parse(request.text) + + results = response['rss']['channel']['item'] + + return results[:n] + +class NyaaPantsu(): + ''' + Make a query to nyaa.pantsu.cat using keyword as keyword. + Returns an array of OrderedDict with every result of the query. + Returns an empty array if no results. + ''' + def search(keyword): + nyaapantsu_baseurl = "https://nyaa.pantsu.cat/feed?c=_&s=0&max=99999&userID=0&q=" + + request = requests.get(nyaa_baseurl + keyword) + response = xmltodict.parse(request.text) + + results = [] + + try: + if type(response['rss']['channel']['item']) is collections.OrderedDict: + results.append(response['rss']['channel']['item']) + else: + results = response['rss']['channel']['item'] + + except KeyError as ex: + results = [] + + return results + + ''' + Returns an array of OrderedDict with the n last updates of nyaa.pantsu.cat + ''' + def news(n): + nyaa_baseurl = "https://nyaa.pantsu.cat/feed" + + request = requests.get(nyaa_baseurl) + response = xmltodict.parse(request.text) + + results = response['rss']['channel']['item'] + + return results[:n] diff --git a/NyaaPy/nyaa.py b/NyaaPy/nyaa.py deleted file mode 100644 index f27f708..0000000 --- a/NyaaPy/nyaa.py +++ /dev/null @@ -1,44 +0,0 @@ -import requests -import xmltodict -import json -import collections - -class Nyaa(): - def search(keyword): - nyaa_baseurl = "https://nyaa.si/?page=rss&c=1_0&f=0&q=" - - request = requests.get(nyaa_baseurl + keyword) - response = xmltodict.parse(request.text) - - results = [] - - try: - if type(response['rss']['channel']['item']) is collections.OrderedDict: - results.append(response['rss']['channel']['item']) - else: - results = response['rss']['channel']['item'] - - except KeyError as ex: - results = [] - - return results - -class NyaaPantsu(): - def search(keyword): - nyaapantsu_baseurl = "https://nyaa.pantsu.cat/feed?c=_&s=0&max=99999&userID=0&q=" - - request = requests.get(nyaa_baseurl + keyword) - response = xmltodict.parse(request.text) - - results = [] - - try: - if type(response['rss']['channel']['item']) is collections.OrderedDict: - results.append(response['rss']['channel']['item']) - else: - results = response['rss']['channel']['item'] - - except KeyError as ex: - results = [] - - return results diff --git a/setup.py b/setup.py index 290f21a..0a7998d 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages setup(name='nyaapy', - version='0.3.1', + version='0.4', url='https://github.com/juanjosalvador/nyaapy', download_url = 'https://github.com/juanjosalvador/nyaapy/archive/0.1.tar.gz', license='MIT', diff --git a/src/nyaa.py b/src/nyaa.py deleted file mode 100644 index 9f1454d..0000000 --- a/src/nyaa.py +++ /dev/null @@ -1,54 +0,0 @@ -import requests -import xmltodict -import json -import collections - -class Nyaa: - ''' - Makes a search query to nyaa.si with the given keyword that returns a - RSS file converted into a dictionary that we can use. - ''' - - def search(keyword): - nyaa_baseurl = "https://nyaa.si/?page=rss&c=1_0&f=0&q=" - - request = requests.get(nyaa_baseurl + keyword) - response = xmltodict.parse(request.text) - - results = [] - - try: - if type(response['rss']['channel']['item']) is collections.OrderedDict: - results.append(response['rss']['channel']['item']) - else: - results = response['rss']['channel']['item'] - - except KeyError as ex: - results = [] - - return results - -class NyaaPantsu: - ''' - Makes a search query to nyaa.pantsu.cat with the given keyword that returns a - RSS file converted into a dictionary that we can use. - ''' - - def search(keyword): - nyaa_baseurl = "https://nyaa.pantsu.cat/feed?c=_&s=0&max=99999&userID=0&q=" - - request = requests.get(nyaa_baseurl + keyword) - response = xmltodict.parse(request.text) - - results = [] - - try: - if type(response['rss']['channel']['item']) is collections.OrderedDict: - results.append(response['rss']['channel']['item']) - else: - results = response['rss']['channel']['item'] - - except KeyError as ex: - results = [] - - return results diff --git a/tests/test.py b/tests/test.py index ae4202d..b323660 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,20 +1,41 @@ -from NyaaPy.nyaa import Nyaa -from NyaaPy.nyaa import NyaaPantsu import json +from NyaaPy import Nyaa, NyaaPantsu # Nyaa.si results -nyaa_query = Nyaa.search('koe no katachi 1080') +def nyaa_search(): + nyaa_query = Nyaa.search('koe no katachi 1080') -if len(nyaa_query) > 0: - for result in nyaa_query: + if len(nyaa_query) > 0: + for result in nyaa_query: + print(result['title']) + else: + print('Nothing here!') + +def nyaa_news(): + news = Nyaa.news(5) + + for result in news: print(result['title']) -else: - print('Nothing here!') # Nyaa.pantsu.cat results -pantsu_query = NyaaPantsu.search('new game!!') -if len(pantsu_query) > 0: - for result in pantsu_query: +def pantsu_search(): + pantsu_query = NyaaPantsu.search('new game!!') + if len(pantsu_query) > 0: + for result in pantsu_query: + print(result['title']) + else: + print('Nothing here!') + + +def pantsu_news(): + news = NyaaPantsu.news(5) + + for result in news: print(result['title']) -else: - print('Nothing here!') + +# Uncomment whatever you want to test + +#nyaa_search() +#pantsu_search() +#nyaa_news() +pantsu_news() From c18bdb736bdfa56e7ba102e84e0a7787b90edbdc Mon Sep 17 00:00:00 2001 From: JuanjoSalvador Date: Tue, 10 Oct 2017 20:24:32 +0200 Subject: [PATCH 02/13] Fixed .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f72cc34..9e4ea62 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ build/ dist/ nyaapy.egg-info +.vscode \ No newline at end of file From 5cb207918814c9ea69163711967e55c2860f9686 Mon Sep 17 00:00:00 2001 From: JuanjoSalvador Date: Wed, 11 Oct 2017 00:04:10 +0200 Subject: [PATCH 03/13] Web Scraping WIP, added category, subcategory and filters --- NyaaPy/__init__.py | 43 +++++++++++++++++++++++-------------------- setup.py | 2 +- tests/test.py | 36 ++++++++++++++++++++++++++++-------- 3 files changed, 52 insertions(+), 29 deletions(-) diff --git a/NyaaPy/__init__.py b/NyaaPy/__init__.py index ba26252..b0b3dbf 100644 --- a/NyaaPy/__init__.py +++ b/NyaaPy/__init__.py @@ -1,7 +1,6 @@ import requests -import xmltodict import json -import collections +from bs4 import BeautifulSoup # Info about the module __version__ = '0.4' @@ -12,29 +11,33 @@ __copyright__ = '2017 Juanjo Salvador' __license__ = 'MIT license' class Nyaa(): - ''' - Make a query to nyaa.si using keyword as keyword. - Returns an array of OrderedDict with every result of the query. - Returns an empty array if no results. - ''' - def search(keyword): - nyaa_baseurl = "https://nyaa.si/?page=rss&c=1_0&f=0&q=" + def search(keyword, category, subcategory, filters): + r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword)) + soup = BeautifulSoup(r.text, 'html.parser') + rows = soup.select('table tr') - request = requests.get(nyaa_baseurl + keyword) - response = xmltodict.parse(request.text) + torrents = [] - results = [] + for row in rows: + td = row.find_all('td') + torrent = [] - try: - if type(response['rss']['channel']['item']) is collections.OrderedDict: - results.append(response['rss']['channel']['item']) - else: - results = response['rss']['channel']['item'] + for i in td: + if i.find('a'): + torrent.append(i.find('a').get('href')) + text = i.text.rstrip() + if len(text) > 0: + torrent.append(text) + else: + text = i.text.rstrip() + if len(text) > 0: + torrent.append(text) - except KeyError as ex: - results = [] + torrents.append(torrent) + + print(torrents) - return results + return torrents ''' Returns an array of OrderedDict with the n last updates of Nyaa.si diff --git a/setup.py b/setup.py index 0a7998d..8a36b5b 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages setup(name='nyaapy', - version='0.4', + version='0.4.1', url='https://github.com/juanjosalvador/nyaapy', download_url = 'https://github.com/juanjosalvador/nyaapy/archive/0.1.tar.gz', license='MIT', diff --git a/tests/test.py b/tests/test.py index b323660..483bc34 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,15 +1,12 @@ -import json +import json, requests from NyaaPy import Nyaa, NyaaPantsu # Nyaa.si results def nyaa_search(): nyaa_query = Nyaa.search('koe no katachi 1080') - if len(nyaa_query) > 0: - for result in nyaa_query: - print(result['title']) - else: - print('Nothing here!') + for nyaa in nyaa_query: + print(nyaa) def nyaa_news(): news = Nyaa.news(5) @@ -35,7 +32,30 @@ def pantsu_news(): # Uncomment whatever you want to test -#nyaa_search() +nyaa_search() #pantsu_search() #nyaa_news() -pantsu_news() +#pantsu_news() + +''' r = requests.get("http://nyaa.si/") +soup = BeautifulSoup(r.text, 'html.parser') +rows = soup.select('table tr') + +torrents = [] + +for row in rows: + td = row.find_all('td') + torrent = [] + + for i in td: + if i.find('a'): + torrent.append(i.find('a').get('href')) + text = i.text.rstrip() + if len(text) > 0: + torrent.append(text) + else: + text = i.text.rstrip() + if len(text) > 0: + torrent.append(text) + + torrents.append(torrent) ''' \ No newline at end of file From b632bdda41d973cd4ceda8ecc8407623745bfd28 Mon Sep 17 00:00:00 2001 From: JuanjoSalvador Date: Thu, 12 Oct 2017 23:08:00 +0200 Subject: [PATCH 04/13] fixed web scraping for nyaa.si --- NyaaPy/__init__.py | 98 ++++++++++++++++++++++++++++++++++------------ tests/test.py | 8 ++-- 2 files changed, 78 insertions(+), 28 deletions(-) diff --git a/NyaaPy/__init__.py b/NyaaPy/__init__.py index b0b3dbf..9062007 100644 --- a/NyaaPy/__init__.py +++ b/NyaaPy/__init__.py @@ -11,46 +11,96 @@ __copyright__ = '2017 Juanjo Salvador' __license__ = 'MIT license' class Nyaa(): - def search(keyword, category, subcategory, filters): - r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword)) + ''' + Return a list of dicts with the results of the query. + ''' + def search(keyword, category, subcategory, filters, page): + if page: + r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}&p={}".format(filters, category, subcategory, keyword, page)) + else: + r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword)) + soup = BeautifulSoup(r.text, 'html.parser') rows = soup.select('table tr') torrents = [] for row in rows: - td = row.find_all('td') - torrent = [] + block = [] - for i in td: - if i.find('a'): - torrent.append(i.find('a').get('href')) - text = i.text.rstrip() - if len(text) > 0: - torrent.append(text) - else: - text = i.text.rstrip() - if len(text) > 0: - torrent.append(text) + for td in row.find_all('td'): + if td.find_all('a'): + for link in td.find_all('a'): + if link.get('href')[-9:] != '#comments': + block.append(link.get('href')) + if link.text.rstrip(): + block.append(link.text) - torrents.append(torrent) - - print(torrents) + if td.text.rstrip(): + block.append(td.text.rstrip()) + + try: + torrent = { + 'category': block[0].replace('/?c=', ''), + 'url': "http://nyaa.si{}".format(block[1]), + 'name': block[2], + 'download_url': "http://nyaa.si{}".format(block[4]), + 'magnet': block[5], + 'size': block[6], + 'date': block[7], + 'seeders': block[8], + 'leechers': block[9], + 'completed_downloads': block[10], + } + + torrents.append(torrent) + except IndexError: + print("Error! {}".format(block)) return torrents - ''' - Returns an array of OrderedDict with the n last updates of Nyaa.si + Returns an array of dicts with the n last updates of Nyaa.si ''' def news(n): - nyaa_baseurl = "https://nyaa.si/?page=rss" + r = requests.get("http://nyaa.si/") + soup = BeautifulSoup(r.text, 'html.parser') + rows = soup.select('table tr') - request = requests.get(nyaa_baseurl) - response = xmltodict.parse(request.text) + torrents = [] - results = response['rss']['channel']['item'] + for row in rows: + block = [] - return results[:n] + for td in row.find_all('td'): + if td.find_all('a'): + for link in td.find_all('a'): + if link.get('href')[-9:] != '#comments': + block.append(link.get('href')) + if link.text.rstrip(): + block.append(link.text) + + if td.text.rstrip(): + block.append(td.text.rstrip()) + + try: + torrent = { + 'category': block[0].replace('/?c=', ''), + 'url': "http://nyaa.si{}".format(block[1]), + 'name': block[2], + 'download_url': "http://nyaa.si{}".format(block[4]), + 'magnet': block[5], + 'size': block[6], + 'date': block[7], + 'seeders': block[8], + 'leechers': block[9], + 'completed_downloads': block[10], + } + + torrents.append(torrent) + except IndexError: + print("Error! {}".format(block)) + + return torrents[:n] class NyaaPantsu(): ''' diff --git a/tests/test.py b/tests/test.py index 483bc34..d72f1d3 100644 --- a/tests/test.py +++ b/tests/test.py @@ -3,16 +3,16 @@ from NyaaPy import Nyaa, NyaaPantsu # Nyaa.si results def nyaa_search(): - nyaa_query = Nyaa.search('koe no katachi 1080') + nyaa_query = Nyaa.search('koe no katachi 1080', 1, 0, 0, 2) for nyaa in nyaa_query: - print(nyaa) + print(nyaa['date']) def nyaa_news(): news = Nyaa.news(5) for result in news: - print(result['title']) + print(result['name']) # Nyaa.pantsu.cat results def pantsu_search(): @@ -34,7 +34,7 @@ def pantsu_news(): nyaa_search() #pantsu_search() -#nyaa_news() +nyaa_news() #pantsu_news() ''' r = requests.get("http://nyaa.si/") From e6442d22fb0644ab8d0a462889d4b3a6a281ec57 Mon Sep 17 00:00:00 2001 From: JuanjoSalvador Date: Thu, 12 Oct 2017 23:41:54 +0200 Subject: [PATCH 05/13] improved categories, fixed page selection --- NyaaPy/__init__.py | 71 ++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/NyaaPy/__init__.py b/NyaaPy/__init__.py index 9062007..81b46d4 100644 --- a/NyaaPy/__init__.py +++ b/NyaaPy/__init__.py @@ -3,7 +3,7 @@ import json from bs4 import BeautifulSoup # Info about the module -__version__ = '0.4' +__version__ = '0.4.1' __author__ = 'Juanjo Salvador' __email__ = 'juanjosalvador@netc.eu' __url__ = 'http://juanjosalvador.me' @@ -25,37 +25,42 @@ class Nyaa(): torrents = [] - for row in rows: - block = [] + if rows: + for row in rows: + block = [] - for td in row.find_all('td'): - if td.find_all('a'): - for link in td.find_all('a'): - if link.get('href')[-9:] != '#comments': - block.append(link.get('href')) - if link.text.rstrip(): - block.append(link.text) + for td in row.find_all('td'): + if td.find_all('a'): + for link in td.find_all('a'): + if link.get('href')[-9:] != '#comments': + block.append(link.get('href')) + if link.text.rstrip(): + block.append(link.text) - if td.text.rstrip(): - block.append(td.text.rstrip()) + if td.text.rstrip(): + block.append(td.text.rstrip()) - try: - torrent = { - 'category': block[0].replace('/?c=', ''), - 'url': "http://nyaa.si{}".format(block[1]), - 'name': block[2], - 'download_url': "http://nyaa.si{}".format(block[4]), - 'magnet': block[5], - 'size': block[6], - 'date': block[7], - 'seeders': block[8], - 'leechers': block[9], - 'completed_downloads': block[10], - } - - torrents.append(torrent) - except IndexError: - print("Error! {}".format(block)) + try: + c = block[0].replace('/?c=', '') + cats = c.split('_') + + torrent = { + 'category': cats[0], + 'subcategory': cats[1], + 'url': "http://nyaa.si{}".format(block[1]), + 'name': block[2], + 'download_url': "http://nyaa.si{}".format(block[4]), + 'magnet': block[5], + 'size': block[6], + 'date': block[7], + 'seeders': block[8], + 'leechers': block[9], + 'completed_downloads': block[10], + } + + torrents.append(torrent) + except IndexError as ie: + pass return torrents ''' @@ -83,8 +88,12 @@ class Nyaa(): block.append(td.text.rstrip()) try: + c = block[0].replace('/?c=', '') + cats = c.split('_') + torrent = { - 'category': block[0].replace('/?c=', ''), + 'category': cats[0], + 'subcategory': cats[1], 'url': "http://nyaa.si{}".format(block[1]), 'name': block[2], 'download_url': "http://nyaa.si{}".format(block[4]), @@ -98,7 +107,7 @@ class Nyaa(): torrents.append(torrent) except IndexError: - print("Error! {}".format(block)) + pass return torrents[:n] From c0283c57c3533fe7654e5d79727e5d828a30093b Mon Sep 17 00:00:00 2001 From: JuanjoSalvador Date: Fri, 13 Oct 2017 00:28:55 +0200 Subject: [PATCH 06/13] need to fix the cat - subcat extraction --- NyaaPy/__init__.py | 47 ++++++++++++++++++++++++++++++++++------------ tests/test.py | 31 ++++-------------------------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/NyaaPy/__init__.py b/NyaaPy/__init__.py index 81b46d4..3899790 100644 --- a/NyaaPy/__init__.py +++ b/NyaaPy/__init__.py @@ -1,5 +1,4 @@ import requests -import json from bs4 import BeautifulSoup # Info about the module @@ -15,7 +14,7 @@ class Nyaa(): Return a list of dicts with the results of the query. ''' def search(keyword, category, subcategory, filters, page): - if page: + if page > 0: r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}&p={}".format(filters, category, subcategory, keyword, page)) else: r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword)) @@ -41,12 +40,8 @@ class Nyaa(): block.append(td.text.rstrip()) try: - c = block[0].replace('/?c=', '') - cats = c.split('_') - torrent = { - 'category': cats[0], - 'subcategory': cats[1], + 'category': get_categories(block[0]), 'url': "http://nyaa.si{}".format(block[1]), 'name': block[2], 'download_url': "http://nyaa.si{}".format(block[4]), @@ -63,6 +58,7 @@ class Nyaa(): pass return torrents + ''' Returns an array of dicts with the n last updates of Nyaa.si ''' @@ -88,12 +84,8 @@ class Nyaa(): block.append(td.text.rstrip()) try: - c = block[0].replace('/?c=', '') - cats = c.split('_') - torrent = { - 'category': cats[0], - 'subcategory': cats[1], + 'category': get_categories(block[0]), 'url': "http://nyaa.si{}".format(block[1]), 'name': block[2], 'download_url': "http://nyaa.si{}".format(block[4]), @@ -109,6 +101,7 @@ class Nyaa(): except IndexError: pass + return torrents[:n] class NyaaPantsu(): @@ -148,3 +141,33 @@ class NyaaPantsu(): results = response['rss']['channel']['item'] return results[:n] + +# Auxiliar functions + +def get_categories(b): + c = b.replace('/?c=', '') + cats = c.split('_') + + cat = cats[0] + subcat = cats[1] + + categories = { + "1": { + "name": "Anime", + "subcats": { + "1": "test", + "2": "test", + "3": "test", + "4": "test", + "5": "test", + "6": "test", + "7": "test" + } + }, + "2": "Audio", + "3": "Literature", + "4": "Live Action", + "5": "Pictures", + "6": "Software" + } + return "{} - {}".format() \ No newline at end of file diff --git a/tests/test.py b/tests/test.py index d72f1d3..96c746f 100644 --- a/tests/test.py +++ b/tests/test.py @@ -3,7 +3,7 @@ from NyaaPy import Nyaa, NyaaPantsu # Nyaa.si results def nyaa_search(): - nyaa_query = Nyaa.search('koe no katachi 1080', 1, 0, 0, 2) + nyaa_query = Nyaa.search('koe no katachi 1080', 1, 0, 0, 0) for nyaa in nyaa_query: print(nyaa['date']) @@ -12,7 +12,7 @@ def nyaa_news(): news = Nyaa.news(5) for result in news: - print(result['name']) + print(result) # Nyaa.pantsu.cat results def pantsu_search(): @@ -32,30 +32,7 @@ def pantsu_news(): # Uncomment whatever you want to test -nyaa_search() +#nyaa_search() #pantsu_search() nyaa_news() -#pantsu_news() - -''' r = requests.get("http://nyaa.si/") -soup = BeautifulSoup(r.text, 'html.parser') -rows = soup.select('table tr') - -torrents = [] - -for row in rows: - td = row.find_all('td') - torrent = [] - - for i in td: - if i.find('a'): - torrent.append(i.find('a').get('href')) - text = i.text.rstrip() - if len(text) > 0: - torrent.append(text) - else: - text = i.text.rstrip() - if len(text) > 0: - torrent.append(text) - - torrents.append(torrent) ''' \ No newline at end of file +#pantsu_news() \ No newline at end of file From f88ff7ad56778a05a15690467e045e735e8ff5b4 Mon Sep 17 00:00:00 2001 From: JuanjoSalvador Date: Fri, 13 Oct 2017 20:37:46 +0200 Subject: [PATCH 07/13] refactoring and improvements (fixed #7) --- NyaaPy/__init__.py | 167 +------------------------------------------ NyaaPy/nyaa.py | 33 +++++++++ NyaaPy/nyaapantsu.py | 42 +++++++++++ NyaaPy/utils.py | 107 +++++++++++++++++++++++++++ tests/test.py | 12 ++-- 5 files changed, 189 insertions(+), 172 deletions(-) create mode 100644 NyaaPy/nyaa.py create mode 100644 NyaaPy/nyaapantsu.py create mode 100644 NyaaPy/utils.py diff --git a/NyaaPy/__init__.py b/NyaaPy/__init__.py index 3899790..9dd667f 100644 --- a/NyaaPy/__init__.py +++ b/NyaaPy/__init__.py @@ -1,6 +1,3 @@ -import requests -from bs4 import BeautifulSoup - # Info about the module __version__ = '0.4.1' __author__ = 'Juanjo Salvador' @@ -9,165 +6,5 @@ __url__ = 'http://juanjosalvador.me' __copyright__ = '2017 Juanjo Salvador' __license__ = 'MIT license' -class Nyaa(): - ''' - Return a list of dicts with the results of the query. - ''' - def search(keyword, category, subcategory, filters, page): - if page > 0: - r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}&p={}".format(filters, category, subcategory, keyword, page)) - else: - r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword)) - - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - - torrents = [] - - if rows: - for row in rows: - block = [] - - for td in row.find_all('td'): - if td.find_all('a'): - for link in td.find_all('a'): - if link.get('href')[-9:] != '#comments': - block.append(link.get('href')) - if link.text.rstrip(): - block.append(link.text) - - if td.text.rstrip(): - block.append(td.text.rstrip()) - - try: - torrent = { - 'category': get_categories(block[0]), - 'url': "http://nyaa.si{}".format(block[1]), - 'name': block[2], - 'download_url': "http://nyaa.si{}".format(block[4]), - 'magnet': block[5], - 'size': block[6], - 'date': block[7], - 'seeders': block[8], - 'leechers': block[9], - 'completed_downloads': block[10], - } - - torrents.append(torrent) - except IndexError as ie: - pass - - return torrents - - ''' - Returns an array of dicts with the n last updates of Nyaa.si - ''' - def news(n): - r = requests.get("http://nyaa.si/") - soup = BeautifulSoup(r.text, 'html.parser') - rows = soup.select('table tr') - - torrents = [] - - for row in rows: - block = [] - - for td in row.find_all('td'): - if td.find_all('a'): - for link in td.find_all('a'): - if link.get('href')[-9:] != '#comments': - block.append(link.get('href')) - if link.text.rstrip(): - block.append(link.text) - - if td.text.rstrip(): - block.append(td.text.rstrip()) - - try: - torrent = { - 'category': get_categories(block[0]), - 'url': "http://nyaa.si{}".format(block[1]), - 'name': block[2], - 'download_url': "http://nyaa.si{}".format(block[4]), - 'magnet': block[5], - 'size': block[6], - 'date': block[7], - 'seeders': block[8], - 'leechers': block[9], - 'completed_downloads': block[10], - } - - torrents.append(torrent) - except IndexError: - pass - - - return torrents[:n] - -class NyaaPantsu(): - ''' - Make a query to nyaa.pantsu.cat using keyword as keyword. - Returns an array of OrderedDict with every result of the query. - Returns an empty array if no results. - ''' - def search(keyword): - nyaapantsu_baseurl = "https://nyaa.pantsu.cat/feed?c=_&s=0&max=99999&userID=0&q=" - - request = requests.get(nyaa_baseurl + keyword) - response = xmltodict.parse(request.text) - - results = [] - - try: - if type(response['rss']['channel']['item']) is collections.OrderedDict: - results.append(response['rss']['channel']['item']) - else: - results = response['rss']['channel']['item'] - - except KeyError as ex: - results = [] - - return results - - ''' - Returns an array of OrderedDict with the n last updates of nyaa.pantsu.cat - ''' - def news(n): - nyaa_baseurl = "https://nyaa.pantsu.cat/feed" - - request = requests.get(nyaa_baseurl) - response = xmltodict.parse(request.text) - - results = response['rss']['channel']['item'] - - return results[:n] - -# Auxiliar functions - -def get_categories(b): - c = b.replace('/?c=', '') - cats = c.split('_') - - cat = cats[0] - subcat = cats[1] - - categories = { - "1": { - "name": "Anime", - "subcats": { - "1": "test", - "2": "test", - "3": "test", - "4": "test", - "5": "test", - "6": "test", - "7": "test" - } - }, - "2": "Audio", - "3": "Literature", - "4": "Live Action", - "5": "Pictures", - "6": "Software" - } - return "{} - {}".format() \ No newline at end of file +from NyaaPy.nyaa import Nyaa +from NyaaPy.nyaapantsu import NyaaPantsu \ No newline at end of file diff --git a/NyaaPy/nyaa.py b/NyaaPy/nyaa.py new file mode 100644 index 0000000..c7eab47 --- /dev/null +++ b/NyaaPy/nyaa.py @@ -0,0 +1,33 @@ +import requests +from bs4 import BeautifulSoup +from NyaaPy.utils import Utils as utils + +class Nyaa(): + ''' + Return a list of dicts with the results of the query. + ''' + def search(keyword, category, subcategory, filters, page): + if page > 0: + r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}&p={}".format(filters, category, subcategory, keyword, page)) + else: + r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword)) + + soup = BeautifulSoup(r.text, 'html.parser') + rows = soup.select('table tr') + + results = {} + + if rows: + results = utils.parse_nyaa(rows, limit=None) + + return results + + ''' + Returns an array of dicts with the n last updates of Nyaa.si + ''' + def news(number_of_results): + r = requests.get("http://nyaa.si/") + soup = BeautifulSoup(r.text, 'html.parser') + rows = soup.select('table tr') + + return utils.parse_nyaa(rows, limit=number_of_results) \ No newline at end of file diff --git a/NyaaPy/nyaapantsu.py b/NyaaPy/nyaapantsu.py new file mode 100644 index 0000000..699bada --- /dev/null +++ b/NyaaPy/nyaapantsu.py @@ -0,0 +1,42 @@ +import requests +from bs4 import BeautifulSoup +from NyaaPy.utils import Utils as utils + + +class NyaaPantsu(): + ''' + Make a query to nyaa.pantsu.cat using keyword as keyword. + Returns an array of OrderedDict with every result of the query. + Returns an empty array if no results. + ''' + def search(keyword): + nyaapantsu_baseurl = "https://nyaa.pantsu.cat/feed?c=_&s=0&max=99999&userID=0&q=" + + request = requests.get(nyaa_baseurl + keyword) + response = xmltodict.parse(request.text) + + results = [] + + try: + if type(response['rss']['channel']['item']) is collections.OrderedDict: + results.append(response['rss']['channel']['item']) + else: + results = response['rss']['channel']['item'] + + except KeyError as ex: + results = [] + + return results + + ''' + Returns an array of OrderedDict with the n last updates of nyaa.pantsu.cat + ''' + def news(n): + nyaa_baseurl = "https://nyaa.pantsu.cat/feed" + + request = requests.get(nyaa_baseurl) + response = xmltodict.parse(request.text) + + results = response['rss']['channel']['item'] + + return results[:n] \ No newline at end of file diff --git a/NyaaPy/utils.py b/NyaaPy/utils.py new file mode 100644 index 0000000..253e9e0 --- /dev/null +++ b/NyaaPy/utils.py @@ -0,0 +1,107 @@ +''' + Module utils +''' + +class Utils(): + + def get_categories(b): + c = b.replace('/?c=', '') + cats = c.split('_') + + cat = cats[0] + subcat = cats[1] + + categories = { + "1": { + "name": "Anime", + "subcats": { + "1": "Anime Music Video", + "2": "English-translated", + "3": "Non-English-translated", + "4": "Raw" + } + }, + "2": { + "name": "Audio", + "subcats": { + "1": "Lossless", + "2": "Lossy" + } + }, + "3": { + "name": "Literature", + "subcats": { + "1": "English-translated", + "2": "Non-English-translated", + "3": "Raw" + } + }, + "4": { + "name": "Live Action", + "subcats": { + "1": "English-translated", + "2": "Idol/Promotional Video", + "3": "Non-English-translated", + "4": "Raw" + } + }, + "5": { + "name": "Pictures", + "subcats": { + "1": "Graphics", + "2": "Photos" + } + }, + "6": { + "name": "Software", + "subcats": { + "1": "Applications", + "2": "Games" + } + } + } + + try: + category_name = "{} - {}".format(categories[cat]['name'], categories[cat]['subcats'][subcat]) + except: + pass + + return category_name + + def parse_nyaa(table_rows, limit): + + torrents = [] + + for row in table_rows[:limit]: + block = [] + + for td in row.find_all('td'): + if td.find_all('a'): + for link in td.find_all('a'): + if link.get('href')[-9:] != '#comments': + block.append(link.get('href')) + if link.text.rstrip(): + block.append(link.text) + + if td.text.rstrip(): + block.append(td.text.rstrip()) + + try: + torrent = { + 'category': Utils.get_categories(block[0]), + 'url': "http://nyaa.si{}".format(block[1]), + 'name': block[2], + 'download_url': "http://nyaa.si{}".format(block[4]), + 'magnet': block[5], + 'size': block[6], + 'date': block[7], + 'seeders': block[8], + 'leechers': block[9], + 'completed_downloads': block[10], + } + + torrents.append(torrent) + except IndexError as ie: + pass + + return torrents \ No newline at end of file diff --git a/tests/test.py b/tests/test.py index 96c746f..c170112 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,18 +1,16 @@ -import json, requests from NyaaPy import Nyaa, NyaaPantsu # Nyaa.si results def nyaa_search(): - nyaa_query = Nyaa.search('koe no katachi 1080', 1, 0, 0, 0) + nyaa_query = Nyaa.search(keyword='koe no katachi 1080', category=1, subcategory=0, filters=0, page=0) for nyaa in nyaa_query: - print(nyaa['date']) + print(nyaa) def nyaa_news(): - news = Nyaa.news(5) - - for result in news: - print(result) + news = Nyaa.news(number_of_results=5) + for n in news: + print(n) # Nyaa.pantsu.cat results def pantsu_search(): From ba2d259e6f00a882e3c3615b044e906d2df3aa18 Mon Sep 17 00:00:00 2001 From: JuanjoSalvador Date: Fri, 13 Oct 2017 21:45:28 +0200 Subject: [PATCH 08/13] Improved README (fixes #9) --- README.md | 145 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 120 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 4c7d9ed..552276b 100644 --- a/README.md +++ b/README.md @@ -2,15 +2,132 @@ Unofficial Python module to search into Nyaa.si and nyaa.pantsu.cat. -Based on [Kylart's Nyaapi](https://github.com/Kylart/Nyaapi). +Supports Python 3+ + +* Installation +* Example +* Methods + * search() + * news() +* Categories and subcategories +* Contributions and development +* License -### Installation and ussage +## Installation Install it using pip. pip install nyaapy +## Example + +```python + from NyaaPy import Nyaa, NyaaPantsu + + nyaa_query = Nyaa.search(keyword='koe no katachi 1080', category=1, subcategory=0, filters=0, page=0) + + nyaa_news = Nyaa.news(5) + + if len(nyaa_query) > 0: + for result in nyaa_query: + print(result['title']) + else: + print('Nothing here!') + + for new in nyaa_news: + print(new['title]) +``` + +## Methods + +### search() + +Returns a list of dicts with the search results. + +Parameters: + +* **keyword**: String. Keyword for the search query. +* **category**: Integer. +* **subcategory**: Integer. +* **filters**: Integer. +* **page**: Integer. + +`page` must be between 0 and 1000. + +#### Dict returned for Nyaa.si + +```python + 'category': "Anime - English-translated", + 'url': "https://nyaa.si/view/968600", + 'name': "[HorribleSubs] Shoukoku no Altair - 14 [720p].mkv", + 'download_url': "https://nyaa.si/download/968600.torrent", + 'magnet': + 'size': "317.2 MiB", + 'date': "2017-10-13 20:16", + 'seeders': "538", + 'leechers': "286", + 'completed_downloads': "852" +``` + +### news() + +Parameters: + +* **number_of_results**: Integer + +`number_of_results` must be between 1 and 75. + + +## Categories and subcategories + +List of available categories and subcategories: + +1. Anime. + + 1.1 - Anime Music Video + + 1.2 - English-translated + + 1.3 - Non-English-translated + + 1.4 - Raw + +2. Audio. + + 2.1 - Lossless + + 2.2 - Lossy + +3. Literature. + + 3.1 - English-translated + + 3.2 - Non-English-translated + + 3.3 - Raw + +4. Live Action. + + 4.1 - English-translated + + 4.2 - Idol/Promotional Video + + 4.3 - Non-English-translated + + 4.4 - Raw + +5. Pictures. + + 5.1 - Graphics + + 5.2 - Photos + +6. Software. + + 6.1 - Applications + + 6.2 - Games ### Contributions and development @@ -35,28 +152,6 @@ At this moment there isn't an official Nyaa.si API, so we only can make requests 4. Always use the code into `src` folder, never the package. -### Example code - - from NyaaPy.nyaa import Nyaa - from NyaaPy.nyaa import NyaaPantsu - - # Nyaa.si results - nyaa_query = Nyaa.search('illo que pasa') - - if len(nyaa_query) > 0: - for result in nyaa_query: - print(result['title']) - else: - print('Nothing here!') - - # Nyaa.pantsu.cat results - pantsu_query = NyaaPantsu.search('new game') - if len(pantsu_query) > 0: - for result in pantsu_query: - print(result['title']) - else: - print('Nothing here!') - -### License +## License MIT license. From 546592ff914bd44ba918f4ac6f2f5b89e6485e52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Jos=C3=A9=20Salvador=20Piedra?= Date: Fri, 13 Oct 2017 21:49:49 +0200 Subject: [PATCH 09/13] Added anchors --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 552276b..c195006 100644 --- a/README.md +++ b/README.md @@ -4,14 +4,14 @@ Unofficial Python module to search into Nyaa.si and nyaa.pantsu.cat. Supports Python 3+ -* Installation -* Example -* Methods - * search() - * news() -* Categories and subcategories -* Contributions and development -* License +* [Installation](#Installation) +* [Example](#Example) +* [Methods](#Methods) + * [search()](#search()) + * [news()](#news()) +* [Categories and subcategories](#Categories-and-subcategories) +* [Contributions and development](#Contributons-and-development) +* [License](#License) ## Installation From fa5418a7daa1c5dd5094e2eb4d21aeb0f0ca21d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Jos=C3=A9=20Salvador=20Piedra?= Date: Fri, 13 Oct 2017 21:51:11 +0200 Subject: [PATCH 10/13] Fixed anchors --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c195006..ab09d0a 100644 --- a/README.md +++ b/README.md @@ -4,14 +4,14 @@ Unofficial Python module to search into Nyaa.si and nyaa.pantsu.cat. Supports Python 3+ -* [Installation](#Installation) -* [Example](#Example) -* [Methods](#Methods) +* [Installation](#installation) +* [Example](#example) +* [Methods](#methods) * [search()](#search()) * [news()](#news()) -* [Categories and subcategories](#Categories-and-subcategories) -* [Contributions and development](#Contributons-and-development) -* [License](#License) +* [Categories and subcategories](#categories-and-subcategories) +* [Contributions and development](#contributons-and-development) +* [License](#license) ## Installation From 2a6bd191fed901d0e80fabcec7471c0fb06759a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Jos=C3=A9=20Salvador=20Piedra?= Date: Fri, 13 Oct 2017 21:52:05 +0200 Subject: [PATCH 11/13] Fixed anchors (the good one) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ab09d0a..6d9937a 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,8 @@ Supports Python 3+ * [Installation](#installation) * [Example](#example) * [Methods](#methods) - * [search()](#search()) - * [news()](#news()) + * [search()](#search) + * [news()](#news) * [Categories and subcategories](#categories-and-subcategories) * [Contributions and development](#contributons-and-development) * [License](#license) From bc0806fd9fd65ecc2688c1beb38ca7be95c84d49 Mon Sep 17 00:00:00 2001 From: JuanjoSalvador Date: Fri, 13 Oct 2017 22:23:23 +0200 Subject: [PATCH 12/13] updated requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 8d986c4..12be134 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ chardet==3.0.4 idna==2.5 requests==2.18.3 urllib3==1.22 +beautifulsoup4==4.6.0 \ No newline at end of file From fc69a884e4fd0fc828407380e3bf170282e9d398 Mon Sep 17 00:00:00 2001 From: JuanjoSalvador Date: Fri, 13 Oct 2017 22:28:20 +0200 Subject: [PATCH 13/13] new requirements file (the good one) --- requirements.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 12be134..8983f13 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,2 @@ -certifi==2017.7.27.1 -chardet==3.0.4 -idna==2.5 -requests==2.18.3 -urllib3==1.22 +requests==2.18.1 beautifulsoup4==4.6.0 \ No newline at end of file