need to fix the cat - subcat extraction

2017-10-13 00:28:55 +02:00
parent e6442d22fb
commit c0283c57c3
2 changed files with 39 additions and 39 deletions
--- a/NyaaPy/init.py
+++ b/NyaaPy/init.py
@@ -1,5 +1,4 @@
 import requests
 import json
 from bs4 import BeautifulSoup
 # Info about the module
@@ -15,7 +14,7 @@ class Nyaa():
     Return a list of dicts with the results of the query.
    '''
    def search(keyword, category, subcategory, filters, page):
-        if page:
+        if page > 0:
            r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}&p={}".format(filters, category, subcategory, keyword, page))
        else:
            r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword))
@@ -41,12 +40,8 @@ class Nyaa():
                        block.append(td.text.rstrip())
                try:
                    c = block[0].replace('/?c=', '')
                    cats = c.split('_')
                    torrent = {
-                        'category': cats[0],
+                        'category': get_categories(block[0]),
                        'subcategory': cats[1],
                        'url': "http://nyaa.si{}".format(block[1]),
                        'name': block[2],
                        'download_url': "http://nyaa.si{}".format(block[4]),
@@ -63,6 +58,7 @@ class Nyaa():
                    pass
        return torrents
    '''
     Returns an array of dicts with the n last updates of Nyaa.si
    '''
@@ -88,12 +84,8 @@ class Nyaa():
                    block.append(td.text.rstrip())
            try:
                c = block[0].replace('/?c=', '')
                cats = c.split('_')
                torrent = {
-                    'category': cats[0],
+                    'category': get_categories(block[0]),
                    'subcategory': cats[1],
                    'url': "http://nyaa.si{}".format(block[1]),
                    'name': block[2],
                    'download_url': "http://nyaa.si{}".format(block[4]),
@@ -109,6 +101,7 @@ class Nyaa():
            except IndexError:
                pass
        return torrents[:n]
 class NyaaPantsu():
@@ -148,3 +141,33 @@ class NyaaPantsu():
        results = response['rss']['channel']['item']
        return results[:n]
 # Auxiliar functions
 def get_categories(b):
    c = b.replace('/?c=', '')
    cats = c.split('_')
    cat = cats[0]
    subcat = cats[1]
    categories = {
        "1": {
            "name": "Anime",
            "subcats": {
                "1": "test",
                "2": "test",
                "3": "test",
                "4": "test",
                "5": "test",
                "6": "test",
                "7": "test"
            }
        },
        "2": "Audio",
        "3": "Literature",
        "4": "Live Action",
        "5": "Pictures",
        "6": "Software"
    }
    return "{} - {}".format()
--- a/tests/test.py
+++ b/tests/test.py
@@ -3,7 +3,7 @@ from NyaaPy import Nyaa, NyaaPantsu
 # Nyaa.si results
 def nyaa_search():
-    nyaa_query = Nyaa.search('koe no katachi 1080', 1, 0, 0, 2)
+    nyaa_query = Nyaa.search('koe no katachi 1080', 1, 0, 0, 0)
    for nyaa in nyaa_query:
        print(nyaa['date'])
@@ -12,7 +12,7 @@ def nyaa_news():
    news = Nyaa.news(5)
    for result in news:
-        print(result['name'])
+        print(result)
 # Nyaa.pantsu.cat results
 def pantsu_search():
@@ -32,30 +32,7 @@ def pantsu_news():
 # Uncomment whatever you want to test
-nyaa_search()
+#nyaa_search()
 #pantsu_search()
 nyaa_news()
 #pantsu_news()
 ''' r = requests.get("http://nyaa.si/")
 soup = BeautifulSoup(r.text, 'html.parser')
 rows = soup.select('table tr')
 torrents = []
 for row in rows:
    td = row.find_all('td')
    torrent = []
    for i in td:
        if i.find('a'):
            torrent.append(i.find('a').get('href'))
            text = i.text.rstrip()
            if len(text) > 0:
                torrent.append(text)
        else:
            text = i.text.rstrip()
            if len(text) > 0:
                torrent.append(text)
        torrents.append(torrent) '''