need to fix the cat - subcat extraction

This commit is contained in:
JuanjoSalvador
2017-10-13 00:28:55 +02:00
parent e6442d22fb
commit c0283c57c3
2 changed files with 39 additions and 39 deletions

View File

@@ -1,5 +1,4 @@
import requests import requests
import json
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
# Info about the module # Info about the module
@@ -15,7 +14,7 @@ class Nyaa():
Return a list of dicts with the results of the query. Return a list of dicts with the results of the query.
''' '''
def search(keyword, category, subcategory, filters, page): def search(keyword, category, subcategory, filters, page):
if page: if page > 0:
r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}&p={}".format(filters, category, subcategory, keyword, page)) r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}&p={}".format(filters, category, subcategory, keyword, page))
else: else:
r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword)) r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword))
@@ -41,12 +40,8 @@ class Nyaa():
block.append(td.text.rstrip()) block.append(td.text.rstrip())
try: try:
c = block[0].replace('/?c=', '')
cats = c.split('_')
torrent = { torrent = {
'category': cats[0], 'category': get_categories(block[0]),
'subcategory': cats[1],
'url': "http://nyaa.si{}".format(block[1]), 'url': "http://nyaa.si{}".format(block[1]),
'name': block[2], 'name': block[2],
'download_url': "http://nyaa.si{}".format(block[4]), 'download_url': "http://nyaa.si{}".format(block[4]),
@@ -63,6 +58,7 @@ class Nyaa():
pass pass
return torrents return torrents
''' '''
Returns an array of dicts with the n last updates of Nyaa.si Returns an array of dicts with the n last updates of Nyaa.si
''' '''
@@ -88,12 +84,8 @@ class Nyaa():
block.append(td.text.rstrip()) block.append(td.text.rstrip())
try: try:
c = block[0].replace('/?c=', '')
cats = c.split('_')
torrent = { torrent = {
'category': cats[0], 'category': get_categories(block[0]),
'subcategory': cats[1],
'url': "http://nyaa.si{}".format(block[1]), 'url': "http://nyaa.si{}".format(block[1]),
'name': block[2], 'name': block[2],
'download_url': "http://nyaa.si{}".format(block[4]), 'download_url': "http://nyaa.si{}".format(block[4]),
@@ -109,6 +101,7 @@ class Nyaa():
except IndexError: except IndexError:
pass pass
return torrents[:n] return torrents[:n]
class NyaaPantsu(): class NyaaPantsu():
@@ -148,3 +141,33 @@ class NyaaPantsu():
results = response['rss']['channel']['item'] results = response['rss']['channel']['item']
return results[:n] return results[:n]
# Auxiliar functions
def get_categories(b):
c = b.replace('/?c=', '')
cats = c.split('_')
cat = cats[0]
subcat = cats[1]
categories = {
"1": {
"name": "Anime",
"subcats": {
"1": "test",
"2": "test",
"3": "test",
"4": "test",
"5": "test",
"6": "test",
"7": "test"
}
},
"2": "Audio",
"3": "Literature",
"4": "Live Action",
"5": "Pictures",
"6": "Software"
}
return "{} - {}".format()

View File

@@ -3,7 +3,7 @@ from NyaaPy import Nyaa, NyaaPantsu
# Nyaa.si results # Nyaa.si results
def nyaa_search(): def nyaa_search():
nyaa_query = Nyaa.search('koe no katachi 1080', 1, 0, 0, 2) nyaa_query = Nyaa.search('koe no katachi 1080', 1, 0, 0, 0)
for nyaa in nyaa_query: for nyaa in nyaa_query:
print(nyaa['date']) print(nyaa['date'])
@@ -12,7 +12,7 @@ def nyaa_news():
news = Nyaa.news(5) news = Nyaa.news(5)
for result in news: for result in news:
print(result['name']) print(result)
# Nyaa.pantsu.cat results # Nyaa.pantsu.cat results
def pantsu_search(): def pantsu_search():
@@ -32,30 +32,7 @@ def pantsu_news():
# Uncomment whatever you want to test # Uncomment whatever you want to test
nyaa_search() #nyaa_search()
#pantsu_search() #pantsu_search()
nyaa_news() nyaa_news()
#pantsu_news() #pantsu_news()
''' r = requests.get("http://nyaa.si/")
soup = BeautifulSoup(r.text, 'html.parser')
rows = soup.select('table tr')
torrents = []
for row in rows:
td = row.find_all('td')
torrent = []
for i in td:
if i.find('a'):
torrent.append(i.find('a').get('href'))
text = i.text.rstrip()
if len(text) > 0:
torrent.append(text)
else:
text = i.text.rstrip()
if len(text) > 0:
torrent.append(text)
torrents.append(torrent) '''