refactoring and improvements (fixed #7)

This commit is contained in:
JuanjoSalvador
2017-10-13 20:37:46 +02:00
parent c0283c57c3
commit f88ff7ad56
5 changed files with 189 additions and 172 deletions

View File

@@ -1,6 +1,3 @@
import requests
from bs4 import BeautifulSoup
# Info about the module
__version__ = '0.4.1'
__author__ = 'Juanjo Salvador'
@@ -9,165 +6,5 @@ __url__ = 'http://juanjosalvador.me'
__copyright__ = '2017 Juanjo Salvador'
__license__ = 'MIT license'
class Nyaa():
'''
Return a list of dicts with the results of the query.
'''
def search(keyword, category, subcategory, filters, page):
if page > 0:
r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}&p={}".format(filters, category, subcategory, keyword, page))
else:
r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword))
soup = BeautifulSoup(r.text, 'html.parser')
rows = soup.select('table tr')
torrents = []
if rows:
for row in rows:
block = []
for td in row.find_all('td'):
if td.find_all('a'):
for link in td.find_all('a'):
if link.get('href')[-9:] != '#comments':
block.append(link.get('href'))
if link.text.rstrip():
block.append(link.text)
if td.text.rstrip():
block.append(td.text.rstrip())
try:
torrent = {
'category': get_categories(block[0]),
'url': "http://nyaa.si{}".format(block[1]),
'name': block[2],
'download_url': "http://nyaa.si{}".format(block[4]),
'magnet': block[5],
'size': block[6],
'date': block[7],
'seeders': block[8],
'leechers': block[9],
'completed_downloads': block[10],
}
torrents.append(torrent)
except IndexError as ie:
pass
return torrents
'''
Returns an array of dicts with the n last updates of Nyaa.si
'''
def news(n):
r = requests.get("http://nyaa.si/")
soup = BeautifulSoup(r.text, 'html.parser')
rows = soup.select('table tr')
torrents = []
for row in rows:
block = []
for td in row.find_all('td'):
if td.find_all('a'):
for link in td.find_all('a'):
if link.get('href')[-9:] != '#comments':
block.append(link.get('href'))
if link.text.rstrip():
block.append(link.text)
if td.text.rstrip():
block.append(td.text.rstrip())
try:
torrent = {
'category': get_categories(block[0]),
'url': "http://nyaa.si{}".format(block[1]),
'name': block[2],
'download_url': "http://nyaa.si{}".format(block[4]),
'magnet': block[5],
'size': block[6],
'date': block[7],
'seeders': block[8],
'leechers': block[9],
'completed_downloads': block[10],
}
torrents.append(torrent)
except IndexError:
pass
return torrents[:n]
class NyaaPantsu():
'''
Make a query to nyaa.pantsu.cat using keyword as keyword.
Returns an array of OrderedDict with every result of the query.
Returns an empty array if no results.
'''
def search(keyword):
nyaapantsu_baseurl = "https://nyaa.pantsu.cat/feed?c=_&s=0&max=99999&userID=0&q="
request = requests.get(nyaa_baseurl + keyword)
response = xmltodict.parse(request.text)
results = []
try:
if type(response['rss']['channel']['item']) is collections.OrderedDict:
results.append(response['rss']['channel']['item'])
else:
results = response['rss']['channel']['item']
except KeyError as ex:
results = []
return results
'''
Returns an array of OrderedDict with the n last updates of nyaa.pantsu.cat
'''
def news(n):
nyaa_baseurl = "https://nyaa.pantsu.cat/feed"
request = requests.get(nyaa_baseurl)
response = xmltodict.parse(request.text)
results = response['rss']['channel']['item']
return results[:n]
# Auxiliar functions
def get_categories(b):
c = b.replace('/?c=', '')
cats = c.split('_')
cat = cats[0]
subcat = cats[1]
categories = {
"1": {
"name": "Anime",
"subcats": {
"1": "test",
"2": "test",
"3": "test",
"4": "test",
"5": "test",
"6": "test",
"7": "test"
}
},
"2": "Audio",
"3": "Literature",
"4": "Live Action",
"5": "Pictures",
"6": "Software"
}
return "{} - {}".format()
from NyaaPy.nyaa import Nyaa
from NyaaPy.nyaapantsu import NyaaPantsu

33
NyaaPy/nyaa.py Normal file
View File

@@ -0,0 +1,33 @@
import requests
from bs4 import BeautifulSoup
from NyaaPy.utils import Utils as utils
class Nyaa():
'''
Return a list of dicts with the results of the query.
'''
def search(keyword, category, subcategory, filters, page):
if page > 0:
r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}&p={}".format(filters, category, subcategory, keyword, page))
else:
r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword))
soup = BeautifulSoup(r.text, 'html.parser')
rows = soup.select('table tr')
results = {}
if rows:
results = utils.parse_nyaa(rows, limit=None)
return results
'''
Returns an array of dicts with the n last updates of Nyaa.si
'''
def news(number_of_results):
r = requests.get("http://nyaa.si/")
soup = BeautifulSoup(r.text, 'html.parser')
rows = soup.select('table tr')
return utils.parse_nyaa(rows, limit=number_of_results)

42
NyaaPy/nyaapantsu.py Normal file
View File

@@ -0,0 +1,42 @@
import requests
from bs4 import BeautifulSoup
from NyaaPy.utils import Utils as utils
class NyaaPantsu():
'''
Make a query to nyaa.pantsu.cat using keyword as keyword.
Returns an array of OrderedDict with every result of the query.
Returns an empty array if no results.
'''
def search(keyword):
nyaapantsu_baseurl = "https://nyaa.pantsu.cat/feed?c=_&s=0&max=99999&userID=0&q="
request = requests.get(nyaa_baseurl + keyword)
response = xmltodict.parse(request.text)
results = []
try:
if type(response['rss']['channel']['item']) is collections.OrderedDict:
results.append(response['rss']['channel']['item'])
else:
results = response['rss']['channel']['item']
except KeyError as ex:
results = []
return results
'''
Returns an array of OrderedDict with the n last updates of nyaa.pantsu.cat
'''
def news(n):
nyaa_baseurl = "https://nyaa.pantsu.cat/feed"
request = requests.get(nyaa_baseurl)
response = xmltodict.parse(request.text)
results = response['rss']['channel']['item']
return results[:n]

107
NyaaPy/utils.py Normal file
View File

@@ -0,0 +1,107 @@
'''
Module utils
'''
class Utils():
def get_categories(b):
c = b.replace('/?c=', '')
cats = c.split('_')
cat = cats[0]
subcat = cats[1]
categories = {
"1": {
"name": "Anime",
"subcats": {
"1": "Anime Music Video",
"2": "English-translated",
"3": "Non-English-translated",
"4": "Raw"
}
},
"2": {
"name": "Audio",
"subcats": {
"1": "Lossless",
"2": "Lossy"
}
},
"3": {
"name": "Literature",
"subcats": {
"1": "English-translated",
"2": "Non-English-translated",
"3": "Raw"
}
},
"4": {
"name": "Live Action",
"subcats": {
"1": "English-translated",
"2": "Idol/Promotional Video",
"3": "Non-English-translated",
"4": "Raw"
}
},
"5": {
"name": "Pictures",
"subcats": {
"1": "Graphics",
"2": "Photos"
}
},
"6": {
"name": "Software",
"subcats": {
"1": "Applications",
"2": "Games"
}
}
}
try:
category_name = "{} - {}".format(categories[cat]['name'], categories[cat]['subcats'][subcat])
except:
pass
return category_name
def parse_nyaa(table_rows, limit):
torrents = []
for row in table_rows[:limit]:
block = []
for td in row.find_all('td'):
if td.find_all('a'):
for link in td.find_all('a'):
if link.get('href')[-9:] != '#comments':
block.append(link.get('href'))
if link.text.rstrip():
block.append(link.text)
if td.text.rstrip():
block.append(td.text.rstrip())
try:
torrent = {
'category': Utils.get_categories(block[0]),
'url': "http://nyaa.si{}".format(block[1]),
'name': block[2],
'download_url': "http://nyaa.si{}".format(block[4]),
'magnet': block[5],
'size': block[6],
'date': block[7],
'seeders': block[8],
'leechers': block[9],
'completed_downloads': block[10],
}
torrents.append(torrent)
except IndexError as ie:
pass
return torrents

View File

@@ -1,18 +1,16 @@
import json, requests
from NyaaPy import Nyaa, NyaaPantsu
# Nyaa.si results
def nyaa_search():
nyaa_query = Nyaa.search('koe no katachi 1080', 1, 0, 0, 0)
nyaa_query = Nyaa.search(keyword='koe no katachi 1080', category=1, subcategory=0, filters=0, page=0)
for nyaa in nyaa_query:
print(nyaa['date'])
print(nyaa)
def nyaa_news():
news = Nyaa.news(5)
for result in news:
print(result)
news = Nyaa.news(number_of_results=5)
for n in news:
print(n)
# Nyaa.pantsu.cat results
def pantsu_search():