Web Scraping WIP, added category, subcategory and filters

This commit is contained in:
JuanjoSalvador
2017-10-11 00:04:10 +02:00
parent c18bdb736b
commit 5cb2079188
3 changed files with 52 additions and 29 deletions

View File

@@ -1,7 +1,6 @@
import requests
import xmltodict
import json
import collections
from bs4 import BeautifulSoup
# Info about the module
__version__ = '0.4'
@@ -12,29 +11,33 @@ __copyright__ = '2017 Juanjo Salvador'
__license__ = 'MIT license'
class Nyaa():
'''
Make a query to nyaa.si using keyword as keyword.
Returns an array of OrderedDict with every result of the query.
Returns an empty array if no results.
'''
def search(keyword):
nyaa_baseurl = "https://nyaa.si/?page=rss&c=1_0&f=0&q="
def search(keyword, category, subcategory, filters):
r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword))
soup = BeautifulSoup(r.text, 'html.parser')
rows = soup.select('table tr')
request = requests.get(nyaa_baseurl + keyword)
response = xmltodict.parse(request.text)
torrents = []
results = []
for row in rows:
td = row.find_all('td')
torrent = []
try:
if type(response['rss']['channel']['item']) is collections.OrderedDict:
results.append(response['rss']['channel']['item'])
else:
results = response['rss']['channel']['item']
for i in td:
if i.find('a'):
torrent.append(i.find('a').get('href'))
text = i.text.rstrip()
if len(text) > 0:
torrent.append(text)
else:
text = i.text.rstrip()
if len(text) > 0:
torrent.append(text)
except KeyError as ex:
results = []
torrents.append(torrent)
print(torrents)
return results
return torrents
'''
Returns an array of OrderedDict with the n last updates of Nyaa.si

View File

@@ -1,7 +1,7 @@
from setuptools import setup, find_packages
setup(name='nyaapy',
version='0.4',
version='0.4.1',
url='https://github.com/juanjosalvador/nyaapy',
download_url = 'https://github.com/juanjosalvador/nyaapy/archive/0.1.tar.gz',
license='MIT',

View File

@@ -1,15 +1,12 @@
import json
import json, requests
from NyaaPy import Nyaa, NyaaPantsu
# Nyaa.si results
def nyaa_search():
nyaa_query = Nyaa.search('koe no katachi 1080')
if len(nyaa_query) > 0:
for result in nyaa_query:
print(result['title'])
else:
print('Nothing here!')
for nyaa in nyaa_query:
print(nyaa)
def nyaa_news():
news = Nyaa.news(5)
@@ -35,7 +32,30 @@ def pantsu_news():
# Uncomment whatever you want to test
#nyaa_search()
nyaa_search()
#pantsu_search()
#nyaa_news()
pantsu_news()
#pantsu_news()
''' r = requests.get("http://nyaa.si/")
soup = BeautifulSoup(r.text, 'html.parser')
rows = soup.select('table tr')
torrents = []
for row in rows:
td = row.find_all('td')
torrent = []
for i in td:
if i.find('a'):
torrent.append(i.find('a').get('href'))
text = i.text.rstrip()
if len(text) > 0:
torrent.append(text)
else:
text = i.text.rstrip()
if len(text) > 0:
torrent.append(text)
torrents.append(torrent) '''