improved categories, fixed page selection

This commit is contained in:
JuanjoSalvador
2017-10-12 23:41:54 +02:00
parent b632bdda41
commit e6442d22fb

View File

@@ -3,7 +3,7 @@ import json
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
# Info about the module # Info about the module
__version__ = '0.4' __version__ = '0.4.1'
__author__ = 'Juanjo Salvador' __author__ = 'Juanjo Salvador'
__email__ = 'juanjosalvador@netc.eu' __email__ = 'juanjosalvador@netc.eu'
__url__ = 'http://juanjosalvador.me' __url__ = 'http://juanjosalvador.me'
@@ -25,37 +25,42 @@ class Nyaa():
torrents = [] torrents = []
for row in rows: if rows:
block = [] for row in rows:
block = []
for td in row.find_all('td'): for td in row.find_all('td'):
if td.find_all('a'): if td.find_all('a'):
for link in td.find_all('a'): for link in td.find_all('a'):
if link.get('href')[-9:] != '#comments': if link.get('href')[-9:] != '#comments':
block.append(link.get('href')) block.append(link.get('href'))
if link.text.rstrip(): if link.text.rstrip():
block.append(link.text) block.append(link.text)
if td.text.rstrip(): if td.text.rstrip():
block.append(td.text.rstrip()) block.append(td.text.rstrip())
try: try:
torrent = { c = block[0].replace('/?c=', '')
'category': block[0].replace('/?c=', ''), cats = c.split('_')
'url': "http://nyaa.si{}".format(block[1]),
'name': block[2], torrent = {
'download_url': "http://nyaa.si{}".format(block[4]), 'category': cats[0],
'magnet': block[5], 'subcategory': cats[1],
'size': block[6], 'url': "http://nyaa.si{}".format(block[1]),
'date': block[7], 'name': block[2],
'seeders': block[8], 'download_url': "http://nyaa.si{}".format(block[4]),
'leechers': block[9], 'magnet': block[5],
'completed_downloads': block[10], 'size': block[6],
} 'date': block[7],
'seeders': block[8],
torrents.append(torrent) 'leechers': block[9],
except IndexError: 'completed_downloads': block[10],
print("Error! {}".format(block)) }
torrents.append(torrent)
except IndexError as ie:
pass
return torrents return torrents
''' '''
@@ -83,8 +88,12 @@ class Nyaa():
block.append(td.text.rstrip()) block.append(td.text.rstrip())
try: try:
c = block[0].replace('/?c=', '')
cats = c.split('_')
torrent = { torrent = {
'category': block[0].replace('/?c=', ''), 'category': cats[0],
'subcategory': cats[1],
'url': "http://nyaa.si{}".format(block[1]), 'url': "http://nyaa.si{}".format(block[1]),
'name': block[2], 'name': block[2],
'download_url': "http://nyaa.si{}".format(block[4]), 'download_url': "http://nyaa.si{}".format(block[4]),
@@ -98,7 +107,7 @@ class Nyaa():
torrents.append(torrent) torrents.append(torrent)
except IndexError: except IndexError:
print("Error! {}".format(block)) pass
return torrents[:n] return torrents[:n]