refactoring and improvements (fixed #7)

2017-10-13 20:37:46 +02:00
parent c0283c57c3
commit f88ff7ad56
5 changed files with 189 additions and 172 deletions
--- a/NyaaPy/init.py
+++ b/NyaaPy/init.py
@@ -1,6 +1,3 @@
-import requests
-from bs4 import BeautifulSoup
-
 # Info about the module
 __version__   = '0.4.1'
 __author__    = 'Juanjo Salvador'
@@ -9,165 +6,5 @@ __url__       = 'http://juanjosalvador.me'
 __copyright__ = '2017 Juanjo Salvador'
 __license__   = 'MIT license'

-class Nyaa():
-    '''
-     Return a list of dicts with the results of the query.
-    '''
-    def search(keyword, category, subcategory, filters, page):
-        if page > 0:
-            r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}&p={}".format(filters, category, subcategory, keyword, page))
-        else:
-            r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword))
-
-        soup = BeautifulSoup(r.text, 'html.parser')
-        rows = soup.select('table tr')
-
-        torrents = []
-
-        if rows:
-            for row in rows:
-                block = []
-
-                for td in row.find_all('td'):
-                    if td.find_all('a'):
-                        for link in td.find_all('a'):
-                            if link.get('href')[-9:] != '#comments':
-                                block.append(link.get('href'))
-                                if link.text.rstrip():
-                                    block.append(link.text)
-
-                    if td.text.rstrip():
-                        block.append(td.text.rstrip())
-
-                try:
-                    torrent = {
-                        'category': get_categories(block[0]),
-                        'url': "http://nyaa.si{}".format(block[1]),
-                        'name': block[2],
-                        'download_url': "http://nyaa.si{}".format(block[4]),
-                        'magnet': block[5],
-                        'size': block[6],
-                        'date': block[7],
-                        'seeders': block[8],
-                        'leechers': block[9],
-                        'completed_downloads': block[10],
-                    }
-                
-                    torrents.append(torrent)
-                except IndexError as ie:
-                    pass
-
-        return torrents
-    
-    '''
-     Returns an array of dicts with the n last updates of Nyaa.si
-    '''
-    def news(n):
-        r = requests.get("http://nyaa.si/")
-        soup = BeautifulSoup(r.text, 'html.parser')
-        rows = soup.select('table tr')
-
-        torrents = []
-
-        for row in rows:
-            block = []
-
-            for td in row.find_all('td'):
-                if td.find_all('a'):
-                    for link in td.find_all('a'):
-                        if link.get('href')[-9:] != '#comments':
-                            block.append(link.get('href'))
-                            if link.text.rstrip():
-                                block.append(link.text)
-
-                if td.text.rstrip():
-                    block.append(td.text.rstrip())
-
-            try:
-                torrent = {
-                    'category': get_categories(block[0]),
-                    'url': "http://nyaa.si{}".format(block[1]),
-                    'name': block[2],
-                    'download_url': "http://nyaa.si{}".format(block[4]),
-                    'magnet': block[5],
-                    'size': block[6],
-                    'date': block[7],
-                    'seeders': block[8],
-                    'leechers': block[9],
-                    'completed_downloads': block[10],
-                }
-            
-                torrents.append(torrent)
-            except IndexError:
-                pass
-
-
-        return torrents[:n]
-
-class NyaaPantsu():
-    '''
-     Make a query to nyaa.pantsu.cat using keyword as keyword.
-     Returns an array of OrderedDict with every result of the query.
-     Returns an empty array if no results.
-    '''
-    def search(keyword):
-        nyaapantsu_baseurl = "https://nyaa.pantsu.cat/feed?c=_&s=0&max=99999&userID=0&q="
-
-        request  = requests.get(nyaa_baseurl + keyword)
-        response = xmltodict.parse(request.text)
-
-        results = []
-
-        try:
-            if type(response['rss']['channel']['item']) is collections.OrderedDict:
-                results.append(response['rss']['channel']['item'])
-            else:
-                results = response['rss']['channel']['item']
-
-        except KeyError as ex:
-            results = []
-
-        return results
-
-    '''
-     Returns an array of OrderedDict with the n last updates of nyaa.pantsu.cat
-    '''
-    def news(n):
-        nyaa_baseurl = "https://nyaa.pantsu.cat/feed"
-
-        request  = requests.get(nyaa_baseurl)
-        response = xmltodict.parse(request.text)
-
-        results = response['rss']['channel']['item']
-
-        return results[:n]
-
-# Auxiliar functions
-
-def get_categories(b):
-    c = b.replace('/?c=', '')
-    cats = c.split('_')
-
-    cat = cats[0]
-    subcat = cats[1]
-
-    categories = {
-        "1": {
-            "name": "Anime",
-            "subcats": {
-                "1": "test",
-                "2": "test",
-                "3": "test",
-                "4": "test",
-                "5": "test",
-                "6": "test",
-                "7": "test"
-            }
-        },
-        "2": "Audio",
-        "3": "Literature",
-        "4": "Live Action",
-        "5": "Pictures",
-        "6": "Software"
-    }
-    return "{} - {}".format()
+from NyaaPy.nyaa import Nyaa
+from NyaaPy.nyaapantsu import NyaaPantsu
--- a/NyaaPy/nyaa.py
+++ b/NyaaPy/nyaa.py
@@ -0,0 +1,33 @@
+import requests
+from bs4 import BeautifulSoup
+from NyaaPy.utils import Utils as utils
+
+class Nyaa():
+    '''
+     Return a list of dicts with the results of the query.
+    '''
+    def search(keyword, category, subcategory, filters, page):
+        if page > 0:
+            r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}&p={}".format(filters, category, subcategory, keyword, page))
+        else:
+            r = requests.get("http://nyaa.si/?f={}&c={}_{}&q={}".format(filters, category, subcategory, keyword))
+
+        soup = BeautifulSoup(r.text, 'html.parser')
+        rows = soup.select('table tr')
+
+        results = {}
+
+        if rows:
+            results = utils.parse_nyaa(rows, limit=None)
+
+        return results
+    
+    '''
+     Returns an array of dicts with the n last updates of Nyaa.si
+    '''
+    def news(number_of_results):
+        r = requests.get("http://nyaa.si/")
+        soup = BeautifulSoup(r.text, 'html.parser')
+        rows = soup.select('table tr')
+
+        return utils.parse_nyaa(rows, limit=number_of_results)
--- a/NyaaPy/nyaapantsu.py
+++ b/NyaaPy/nyaapantsu.py
@@ -0,0 +1,42 @@
+import requests
+from bs4 import BeautifulSoup
+from NyaaPy.utils import Utils as utils
+
+
+class NyaaPantsu():
+    '''
+     Make a query to nyaa.pantsu.cat using keyword as keyword.
+     Returns an array of OrderedDict with every result of the query.
+     Returns an empty array if no results.
+    '''
+    def search(keyword):
+        nyaapantsu_baseurl = "https://nyaa.pantsu.cat/feed?c=_&s=0&max=99999&userID=0&q="
+
+        request  = requests.get(nyaa_baseurl + keyword)
+        response = xmltodict.parse(request.text)
+
+        results = []
+
+        try:
+            if type(response['rss']['channel']['item']) is collections.OrderedDict:
+                results.append(response['rss']['channel']['item'])
+            else:
+                results = response['rss']['channel']['item']
+
+        except KeyError as ex:
+            results = []
+
+        return results
+
+    '''
+     Returns an array of OrderedDict with the n last updates of nyaa.pantsu.cat
+    '''
+    def news(n):
+        nyaa_baseurl = "https://nyaa.pantsu.cat/feed"
+
+        request  = requests.get(nyaa_baseurl)
+        response = xmltodict.parse(request.text)
+
+        results = response['rss']['channel']['item']
+
+        return results[:n]
--- a/NyaaPy/utils.py
+++ b/NyaaPy/utils.py
@@ -0,0 +1,107 @@
+'''
+    Module utils
+'''
+
+class Utils():
+
+    def get_categories(b):
+        c = b.replace('/?c=', '')
+        cats = c.split('_')
+
+        cat = cats[0]
+        subcat = cats[1]
+
+        categories = {
+            "1": {
+                "name": "Anime",
+                "subcats": {
+                    "1": "Anime Music Video",
+                    "2": "English-translated",
+                    "3": "Non-English-translated",
+                    "4": "Raw"
+                }
+            },
+            "2": {
+                "name": "Audio",
+                "subcats": {
+                    "1": "Lossless",
+                    "2": "Lossy"
+                }
+            },
+            "3": {
+                "name": "Literature",
+                "subcats": {
+                    "1": "English-translated",
+                    "2": "Non-English-translated",
+                    "3": "Raw"
+                }
+            },
+            "4": { 
+                "name": "Live Action",
+                "subcats": {
+                    "1": "English-translated",
+                    "2": "Idol/Promotional Video",
+                    "3": "Non-English-translated",
+                    "4": "Raw"
+                }
+            },
+            "5": { 
+                "name": "Pictures",
+                "subcats": {
+                    "1": "Graphics",
+                    "2": "Photos"
+                }
+            },
+            "6": { 
+                "name": "Software",
+                "subcats": {
+                    "1": "Applications",
+                    "2": "Games"
+                }
+            }
+        }
+        
+        try:
+            category_name = "{} - {}".format(categories[cat]['name'], categories[cat]['subcats'][subcat])
+        except:
+            pass
+
+        return category_name
+
+    def parse_nyaa(table_rows, limit):
+
+        torrents = []
+
+        for row in table_rows[:limit]:
+                block = []
+
+                for td in row.find_all('td'):
+                    if td.find_all('a'):
+                        for link in td.find_all('a'):
+                            if link.get('href')[-9:] != '#comments':
+                                block.append(link.get('href'))
+                                if link.text.rstrip():
+                                    block.append(link.text)
+
+                    if td.text.rstrip():
+                        block.append(td.text.rstrip())
+
+                try:
+                    torrent = {
+                        'category': Utils.get_categories(block[0]),
+                        'url': "http://nyaa.si{}".format(block[1]),
+                        'name': block[2],
+                        'download_url': "http://nyaa.si{}".format(block[4]),
+                        'magnet': block[5],
+                        'size': block[6],
+                        'date': block[7],
+                        'seeders': block[8],
+                        'leechers': block[9],
+                        'completed_downloads': block[10],
+                    }
+                
+                    torrents.append(torrent)
+                except IndexError as ie:
+                    pass
+        
+        return torrents
--- a/tests/test.py
+++ b/tests/test.py
@@ -1,18 +1,16 @@
-import json, requests
 from NyaaPy import Nyaa, NyaaPantsu

 # Nyaa.si results
 def nyaa_search():
-    nyaa_query = Nyaa.search('koe no katachi 1080', 1, 0, 0, 0)
+    nyaa_query = Nyaa.search(keyword='koe no katachi 1080', category=1, subcategory=0, filters=0, page=0)

    for nyaa in nyaa_query:
-        print(nyaa['date'])
+        print(nyaa)

 def nyaa_news():
-    news = Nyaa.news(5)
-
-    for result in news:
-        print(result)
+    news = Nyaa.news(number_of_results=5)
+    for n in news:
+        print(n)

 # Nyaa.pantsu.cat results
 def pantsu_search():