Parcourir la source

Experiment with grequests

Sergienko Anton il y a 7 ans
Parent
commit
018b9c0bbe
1 fichiers modifiés avec 48 ajouts et 36 suppressions
  1. 48 36
      src/process.py

+ 48 - 36
src/process.py

@@ -1,7 +1,9 @@
 from pathlib import Path
 import json
 import re
-import requests
+import urllib.parse
+import grequests
+import time
 
 
 def main():
@@ -9,37 +11,54 @@ def main():
     dictionary_json_filename = "data.json"
     file = Path(dictionary_json_filename)
     if file.is_file():
-        dictionary = read_dictionary_json(dictionary_json_filename)
+        start = time.time()
+        analysis_dictionary_json_filename(dictionary_json_filename)
+        end = time.time()
+        print(end - start)
+    else:
+        create_dictionary_json_filename(dictionary_filename, dictionary_json_filename)
 
-        for word, entry in dictionary.items():
-            if (
-                    entry["is_noun_by_dictionary"] and
-                    entry["is_possible_adjective"] and
-                    entry["answer_from_wiktionary"] == "null"
-            ):
-                response = requests.get('https://ru.wiktionary.org/wiki/' + word)
-                print('{} status_code = {}'.format(word, response.status_code))
-                if response.status_code == 200:
-                    html = response.text
-                    is_noun_by_wiktionary = False
-                    if "title=\"существительное\">Существительное</a>" in html:
-                        is_noun_by_wiktionary = True
-                    if "title=\"выступает в роли существительного\">субстантивир.</span>" in html:
-                        is_noun_by_wiktionary = True
-
-                    if is_noun_by_wiktionary:
-                        dictionary[word]["answer_from_wiktionary"] = True
-                    else:
-                        print("is_noun_by_wiktionary = {}".format(is_noun_by_wiktionary))
+
+def analysis_dictionary_json_filename(dictionary_json_filename):
+    dictionary = read_dictionary_json(dictionary_json_filename)
+
+    urls = []
+    for word, entry in dictionary.items():
+        if (
+                entry["is_noun_by_dictionary"] and
+                entry["is_possible_adjective"] and
+                entry["answer_from_wiktionary"] == "null"
+        ):
+            urls.append('https://ru.wiktionary.org/wiki/' + word)
+
+    requests = (grequests.get(u) for u in urls)
+    responses = grequests.map(requests)
+    for response in responses:
+        if response is not None:
+            word = urllib.parse.unquote(response.url.rsplit("/", 1)[1])
+            print('{} status_code = {}'.format(word, response.status_code))
+
+            if response.status_code == 200:
+                html = response.text
+                is_noun_by_wiktionary = False
+                if "title=\"существительное\">Существительное</a>" in html:
+                    is_noun_by_wiktionary = True
+                if "title=\"выступает в роли существительного\">субстантивир.</span>" in html:
+                    is_noun_by_wiktionary = True
+
+                if is_noun_by_wiktionary:
+                    dictionary[word]["answer_from_wiktionary"] = True
                 else:
-                    dictionary[word]["answer_from_wiktionary"] = response.status_code
+                    print("is_noun_by_wiktionary = {}".format(is_noun_by_wiktionary))
+            else:
+                dictionary[word]["answer_from_wiktionary"] = response.status_code
 
-                # save_dictionary_json(dictionary, dictionary_json_filename)
-                print("-------------------------")
 
-        save_dictionary_json(dictionary, dictionary_json_filename)
-    else:
-        create_dictionary_json_filename(dictionary_filename, dictionary_json_filename)
+        else:
+            print("None")
+        print("-------------------------")
+
+    save_dictionary_json(dictionary, dictionary_json_filename)
 
 
 def save_dictionary_json(dictionary, dictionary_json_filename):
@@ -108,14 +127,7 @@ def create_dictionary_json_filename(dictionary_filename, dictionary_json_filenam
 
 
 def test():
-    # url = 'https://ru.wiktionary.org/wiki/' + quote('длиннокрылые')
-    # f = urllib.request.urlopen(url)
-    # html = f.read().decode('utf-8')
-    response = requests.get('https://ru.wiktionary.org/wiki/длиннокрылые')
-    html = response.text
-    print(response.status_code)
-    with open("output.txt", "w", encoding='utf8') as text_file:
-        text_file.write(html)
+    pass
 
 
 if __name__ == '__main__':