Kaynağa Gözat

Add menu and other featured

Sergienko Anton 7 yıl önce
ebeveyn
işleme
62e5e6d229
1 değiştirilmiş dosya ile 174 ekleme ve 81 silme
  1. 174 81
      src/program.py

+ 174 - 81
src/program.py

@@ -3,65 +3,189 @@ import json
 import re
 import requests
 import time
+import os
+
+dictionary_filename = 'efremova.txt'
+dictionary_json_filename = 'data.json'
+url = 'https://ru.wiktionary.org/wiki/'
 
 
 def main():
-    dictionary_filename = "efremova.txt"
-    dictionary_json_filename = "data.json"
+    if not is_exist_dictionary():
+        return
+
+    while True:
+        print('')
+        print('1 - Clear all temporary files')
+        print('2 - Generated file {}'.format(dictionary_json_filename))
+        print('3 - How many articles need to check on {}'.format(url))
+        print('4 - Check the words on  {}'.format(dictionary_json_filename))
+        print('10 - Exit')
+
+        command = int(input('Enter command number '))
+        if command == 1:
+            clear_all_temporary_files()
+        if command == 2:
+            generated_json()
+        if command == 3:
+            how_many_articles_need_to_check()
+        if command == 4:
+            check_words_on_site()
+        if command == 10:
+            break
+
+
+def clear_all_temporary_files():
+    start = time.time()
     file = Path(dictionary_json_filename)
     if file.is_file():
-        start = time.time()
-        analysis_dictionary_json_filename_using_wiktionary(dictionary_json_filename)
-        end = time.time()
-        print(end - start)
-        print("Analysis of a dictionary using the https://ru.wiktionary.org ended")
-    else:
-        create_dictionary_json_filename(dictionary_filename, dictionary_json_filename)
+        os.remove(dictionary_json_filename)
+    print('All temporary files deleted')
+    end = time.time()
+    print_time(start, end)
+
 
+def generated_json():
+    start = time.time()
+    if not is_exist_dictionary():
+        return
+
+    file = Path(dictionary_filename)
+    with open(file, encoding='utf8') as f:
+        lines = f.read().splitlines()
+    dictionary = dict()
+    for line in lines:
+        split_line = line.split(' ', 1)
+        word = split_line[0]
+        definition = split_line[1]
 
-def analysis_dictionary_json_filename_using_wiktionary(dictionary_json_filename):
-    dictionary = read_dictionary_json(dictionary_json_filename)
+        is_noun_by_dictionary = False
+        if re.match(r'(ж|м|ср|мн)\.(.*)$', definition) or re.match(r'(1\.|I) (ж|м|ср|мн)\.(.*)$', definition):
+            is_noun_by_dictionary = True
 
+        is_possible_adjective = False
+        if (
+                word.endswith('ая') or
+                word.endswith('ее') or
+                word.endswith('ие') or
+                word.endswith('ий') or
+                word.endswith('ое') or
+                word.endswith('ой') or
+                word.endswith('ые') or
+                word.endswith('ый') or
+                word.endswith('ье') or
+                word.endswith('ьи') or
+                word.endswith('ья') or
+                word.endswith('яя')
+        ):
+            is_possible_adjective = True
+
+        entry = dict()
+        entry['definition'] = definition
+        entry['is_noun_by_dictionary'] = is_noun_by_dictionary
+        entry['is_possible_adjective'] = is_possible_adjective
+        entry['answer_from_wiktionary'] = 'null'
+        dictionary[word] = entry
+
+    save_json(dictionary)
+    end = time.time()
+    print_time(start, end)
+
+
+def how_many_articles_need_to_check():
+    start = time.time()
+    if not is_exist_json():
+        return
+    dictionary = read_json()
+
+    count_all = 0
+    count_nouns_by_dictionary = 0
+    count_check = 0
     for word, entry in dictionary.items():
+        count_all += 1
+        if entry['is_noun_by_dictionary']:
+            count_nouns_by_dictionary += 1
         if (
-                entry["is_noun_by_dictionary"] and
-                entry["is_possible_adjective"] and
-                entry["answer_from_wiktionary"] == "null"
+                entry['is_noun_by_dictionary'] and
+                entry['is_possible_adjective'] and
+                entry['answer_from_wiktionary'] == 'null'
         ):
-            response = requests.get('https://ru.wiktionary.org/wiki/' + word)
-            print('{} status_code = {}'.format(word, response.status_code))
-            if response.status_code == 200:
-                html = response.text
-                is_noun_by_wiktionary = False
-                if "title=\"существительное\">Существительное</a>" in html:
-                    is_noun_by_wiktionary = True
-                if "title=\"выступает в роли существительного\">субстантивир.</span>" in html:
-                    is_noun_by_wiktionary = True
-
-                if is_noun_by_wiktionary:
-                    dictionary[word]["answer_from_wiktionary"] = True
+            count_check += 1
+
+    print('All words: {}'.format(count_all))
+    print('All nouns by dictionary: {}'.format(count_nouns_by_dictionary))
+    print('It remains to check words: {}'.format(count_check))
+    end = time.time()
+    print_time(start, end)
+
+
+def check_words_on_site():
+    start = time.time()
+    if not is_exist_json():
+        return
+    dictionary = read_json()
+
+    i = 0
+    for word, entry in dictionary.items():
+        if (
+                entry['is_noun_by_dictionary'] and
+                entry['is_possible_adjective'] and
+                entry['answer_from_wiktionary'] == 'null'
+        ):
+            try:
+                response = requests.get(url + word)
+                print('{} status_code = {}'.format(word, response.status_code))
+                if response.status_code == 200:
+                    html = response.text
+                    is_noun_by_wiktionary = False
+                    is_adjective_by_wiktionary = False
+                    if 'title="существительное">Существительное</a>' in html:
+                        is_noun_by_wiktionary = True
+                    if 'title="выступает в роли существительного">субстантивир.</span>' in html:
+                        is_noun_by_wiktionary = True
+
+                    if 'title="прилагательное">Прилагательное</a>' in html:
+                        is_adjective_by_wiktionary = True
+
+                    if is_noun_by_wiktionary:
+                        dictionary[word]['answer_from_wiktionary'] = 'noun'
+
+                    if is_adjective_by_wiktionary:
+                        dictionary[word]['answer_from_wiktionary'] = 'adjective'
+
+                    if not is_noun_by_wiktionary and not is_adjective_by_wiktionary:
+                        print('Need more checks')
                 else:
-                    print("is_noun_by_wiktionary = {}".format(is_noun_by_wiktionary))
-            else:
-                dictionary[word]["answer_from_wiktionary"] = response.status_code
-            print("-------------------------")
+                    dictionary[word]['answer_from_wiktionary'] = response.status_code
+                    print('url = {}'.format(response.url))
+                print('-------------------------')
+
+                i += 1
+                if i % 100 == 0:
+                    save_json(dictionary)
+            except ConnectionError:
+                print("Error: ConnectionError")
+                time.sleep(1)
+
+    save_json(dictionary)
+    print('Analysis of a dictionary using the {} ended'.format(url))
+    end = time.time()
+    print_time(start, end)
 
-    save_dictionary_json(dictionary, dictionary_json_filename)
 
+def print_time(start, end):
+    print('Function execution time: {}'.format(end - start))
 
-def save_dictionary_json(dictionary, dictionary_json_filename):
+
+def save_json(dictionary):
     file = Path(dictionary_json_filename)
-    if file.is_file():
-        action_string = " updated"
-    else:
-        action_string = " created"
+    action = 'updated' if file.is_file() else 'created'
     with open(dictionary_json_filename, 'w', encoding='utf8') as outfile:
         json.dump(dictionary, outfile, ensure_ascii=False, indent=4)
-    print('File ' + dictionary_json_filename + action_string)
+    print('File {} {}'.format(dictionary_json_filename, action))
 
 
-def read_dictionary_json(dictionary_json_filename):
-    dictionary = dict()
+def read_json():
     file = Path(dictionary_json_filename)
     with open(file, encoding='utf8') as f:
         dictionary = json.loads(f.read())
@@ -69,49 +193,18 @@ def read_dictionary_json(dictionary_json_filename):
     return dictionary
 
 
-def create_dictionary_json_filename(dictionary_filename, dictionary_json_filename):
+def is_exist_json():
+    file = Path(dictionary_json_filename)
+    if not file.is_file():
+        print('File {} not exists. This file needs to be generated.'.format(dictionary_json_filename))
+    return file.is_file()
+
+
+def is_exist_dictionary():
     file = Path(dictionary_filename)
-    if file.is_file():
-        with open(file, encoding="utf8") as f:
-            lines = f.read().splitlines()
-
-        dictionary = dict()
-        for line in lines:
-            split_line = line.split(" ", 1)
-            word = split_line[0]
-            definition = split_line[1]
-
-            is_noun_by_dictionary = False
-            if re.match(r"(ж|м|ср|мн)\.(.*)$", definition) or re.match(r"(1\.|I) (ж|м|ср|мн)\.(.*)$", definition):
-                is_noun_by_dictionary = True
-
-            is_possible_adjective = False
-            if (
-                    word.endswith("ая") or
-                    word.endswith("ее") or
-                    word.endswith("ие") or
-                    word.endswith("ий") or
-                    word.endswith("ое") or
-                    word.endswith("ой") or
-                    word.endswith("ые") or
-                    word.endswith("ый") or
-                    word.endswith("ье") or
-                    word.endswith("ьи") or
-                    word.endswith("ья") or
-                    word.endswith("яя")
-            ):
-                is_possible_adjective = True
-
-            entry = dict()
-            entry["definition"] = definition
-            entry["is_noun_by_dictionary"] = is_noun_by_dictionary
-            entry["is_possible_adjective"] = is_possible_adjective
-            entry["answer_from_wiktionary"] = "null"
-            dictionary[word] = entry
-
-        save_dictionary_json(dictionary, dictionary_json_filename)
-    else:
-        print('File ' + dictionary_filename + ' not exists')
+    if not file.is_file():
+        print('File {} not exists. The program cannot work.'.format(dictionary_filename))
+    return file.is_file()
 
 
 def test():