Web Scraping WIP, added category, subcategory and filters
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
import requests
|
||||
import xmltodict
|
||||
import json
|
||||
import collections
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# Info about the module
|
||||
__version__ = '0.4'
|
||||
@@ -12,29 +11,33 @@ __copyright__ = '2017 Juanjo Salvador'
|
||||
__license__ = 'MIT license'
|
||||
|
||||
class Nyaa():
|
||||
'''
|
||||
Make a query to nyaa.si using keyword as keyword.
|
||||
Returns an array of OrderedDict with every result of the query.
|
||||
Returns an empty array if no results.
|
||||
'''
|
||||
def search(keyword):
|
||||
nyaa_baseurl = "https://nyaa.si/?page=rss&c=1_0&f=0&q="
|
||||
def search(keyword, category, subcategory, filters):
|
||||
r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword))
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
rows = soup.select('table tr')
|
||||
|
||||
request = requests.get(nyaa_baseurl + keyword)
|
||||
response = xmltodict.parse(request.text)
|
||||
torrents = []
|
||||
|
||||
results = []
|
||||
for row in rows:
|
||||
td = row.find_all('td')
|
||||
torrent = []
|
||||
|
||||
try:
|
||||
if type(response['rss']['channel']['item']) is collections.OrderedDict:
|
||||
results.append(response['rss']['channel']['item'])
|
||||
else:
|
||||
results = response['rss']['channel']['item']
|
||||
for i in td:
|
||||
if i.find('a'):
|
||||
torrent.append(i.find('a').get('href'))
|
||||
text = i.text.rstrip()
|
||||
if len(text) > 0:
|
||||
torrent.append(text)
|
||||
else:
|
||||
text = i.text.rstrip()
|
||||
if len(text) > 0:
|
||||
torrent.append(text)
|
||||
|
||||
except KeyError as ex:
|
||||
results = []
|
||||
torrents.append(torrent)
|
||||
|
||||
print(torrents)
|
||||
|
||||
return results
|
||||
return torrents
|
||||
|
||||
'''
|
||||
Returns an array of OrderedDict with the n last updates of Nyaa.si
|
||||
|
||||
2
setup.py
2
setup.py
@@ -1,7 +1,7 @@
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(name='nyaapy',
|
||||
version='0.4',
|
||||
version='0.4.1',
|
||||
url='https://github.com/juanjosalvador/nyaapy',
|
||||
download_url = 'https://github.com/juanjosalvador/nyaapy/archive/0.1.tar.gz',
|
||||
license='MIT',
|
||||
|
||||
@@ -1,15 +1,12 @@
|
||||
import json
|
||||
import json, requests
|
||||
from NyaaPy import Nyaa, NyaaPantsu
|
||||
|
||||
# Nyaa.si results
|
||||
def nyaa_search():
|
||||
nyaa_query = Nyaa.search('koe no katachi 1080')
|
||||
|
||||
if len(nyaa_query) > 0:
|
||||
for result in nyaa_query:
|
||||
print(result['title'])
|
||||
else:
|
||||
print('Nothing here!')
|
||||
for nyaa in nyaa_query:
|
||||
print(nyaa)
|
||||
|
||||
def nyaa_news():
|
||||
news = Nyaa.news(5)
|
||||
@@ -35,7 +32,30 @@ def pantsu_news():
|
||||
|
||||
# Uncomment whatever you want to test
|
||||
|
||||
#nyaa_search()
|
||||
nyaa_search()
|
||||
#pantsu_search()
|
||||
#nyaa_news()
|
||||
pantsu_news()
|
||||
#pantsu_news()
|
||||
|
||||
''' r = requests.get("http://nyaa.si/")
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
rows = soup.select('table tr')
|
||||
|
||||
torrents = []
|
||||
|
||||
for row in rows:
|
||||
td = row.find_all('td')
|
||||
torrent = []
|
||||
|
||||
for i in td:
|
||||
if i.find('a'):
|
||||
torrent.append(i.find('a').get('href'))
|
||||
text = i.text.rstrip()
|
||||
if len(text) > 0:
|
||||
torrent.append(text)
|
||||
else:
|
||||
text = i.text.rstrip()
|
||||
if len(text) > 0:
|
||||
torrent.append(text)
|
||||
|
||||
torrents.append(torrent) '''
|
||||
Reference in New Issue
Block a user