Преглед изворни кода

Add the begining of python script

Sergienko Anton пре 7 година
родитељ
комит
cf84a7e782
9 измењених фајлова са 395 додато и 0 уклоњено
  1. 6 0
      src/.idea/libraries/R_User_Library.xml
  2. 4 0
      src/.idea/misc.xml
  3. 8 0
      src/.idea/modules.xml
  4. 15 0
      src/.idea/src.iml
  5. 6 0
      src/.idea/vcs.xml
  6. 178 0
      src/.idea/workspace.xml
  7. 20 0
      src/data.json
  8. 36 0
      src/efremova.txt
  9. 122 0
      src/process.py

+ 6 - 0
src/.idea/libraries/R_User_Library.xml

@@ -0,0 +1,6 @@
+<component name="libraryTable">
+  <library name="R User Library">
+    <CLASSES />
+    <SOURCES />
+  </library>
+</component>

+ 4 - 0
src/.idea/misc.xml

@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (src)" project-jdk-type="Python SDK" />
+</project>

+ 8 - 0
src/.idea/modules.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/src.iml" filepath="$PROJECT_DIR$/.idea/src.iml" />
+    </modules>
+  </component>
+</project>

+ 15 - 0
src/.idea/src.iml

@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="library" name="R User Library" level="project" />
+    <orderEntry type="library" name="R Skeletons" level="application" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
+  </component>
+</module>

+ 6 - 0
src/.idea/vcs.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
+  </component>
+</project>

+ 178 - 0
src/.idea/workspace.xml

@@ -0,0 +1,178 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="4aa1e78b-2e97-41a2-804a-5410fbf2892c" name="Default Changelist" comment="" />
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FUSProjectUsageTrigger">
+    <session id="1671358883">
+      <usages-collector id="statistics.lifecycle.project">
+        <counts>
+          <entry key="project.open.time.8" value="1" />
+          <entry key="project.opened" value="1" />
+        </counts>
+      </usages-collector>
+      <usages-collector id="statistics.file.extensions.open">
+        <counts>
+          <entry key="py" value="1" />
+        </counts>
+      </usages-collector>
+      <usages-collector id="statistics.file.types.open">
+        <counts>
+          <entry key="Python" value="1" />
+        </counts>
+      </usages-collector>
+    </session>
+  </component>
+  <component name="FileEditorManager">
+    <leaf>
+      <file pinned="false" current-in-tab="true">
+        <entry file="file://$PROJECT_DIR$/process.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="527">
+              <caret line="49" column="34" lean-forward="true" selection-start-line="49" selection-start-column="34" selection-end-line="49" selection-end-column="34" />
+              <folding>
+                <element signature="e#0#24#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+    </leaf>
+  </component>
+  <component name="Git.Settings">
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/.." />
+  </component>
+  <component name="IdeDocumentHistory">
+    <option name="CHANGED_PATHS">
+      <list>
+        <option value="$PROJECT_DIR$/process.py" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectFrameBounds" extendedState="7">
+    <option name="x" value="260" />
+    <option name="y" value="20" />
+    <option name="width" value="1400" />
+    <option name="height" value="1000" />
+  </component>
+  <component name="ProjectLevelVcsManager" settingsEditedManually="true">
+    <ConfirmationsSetting value="1" id="Add" />
+  </component>
+  <component name="ProjectView">
+    <navigator proportions="" version="1">
+      <foldersAlwaysOnTop value="true" />
+    </navigator>
+    <panes>
+      <pane id="ProjectPane">
+        <subPane>
+          <expand>
+            <path>
+              <item name="src" type="b2602c69:ProjectViewProjectNode" />
+              <item name="src" type="462c0819:PsiDirectoryNode" />
+            </path>
+          </expand>
+          <select />
+        </subPane>
+      </pane>
+      <pane id="Scope" />
+    </panes>
+  </component>
+  <component name="RunDashboard">
+    <option name="ruleStates">
+      <list>
+        <RuleState>
+          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
+        </RuleState>
+        <RuleState>
+          <option name="name" value="StatusDashboardGroupingRule" />
+        </RuleState>
+      </list>
+    </option>
+  </component>
+  <component name="RunManager">
+    <configuration name="process" type="PythonConfigurationType" factoryName="Python" temporary="true">
+      <module name="src" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/process.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <recent_temporary>
+      <list>
+        <item itemvalue="Python.process" />
+      </list>
+    </recent_temporary>
+  </component>
+  <component name="SvnConfiguration">
+    <configuration />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="4aa1e78b-2e97-41a2-804a-5410fbf2892c" name="Default Changelist" comment="" />
+      <created>1540753744234</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1540753744234</updated>
+    </task>
+    <servers />
+  </component>
+  <component name="ToolWindowManager">
+    <frame x="-8" y="-8" width="1936" height="1056" extended-state="7" />
+    <editor active="true" />
+    <layout>
+      <window_info id="Favorites" side_tool="true" />
+      <window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.13272922" />
+      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
+      <window_info anchor="bottom" id="Version Control" show_stripe_button="false" />
+      <window_info anchor="bottom" id="Python Console" />
+      <window_info anchor="bottom" id="Terminal" weight="0.34777898" />
+      <window_info anchor="bottom" id="Event Log" side_tool="true" />
+      <window_info anchor="bottom" id="Message" order="0" />
+      <window_info anchor="bottom" id="Find" order="1" />
+      <window_info active="true" anchor="bottom" id="Run" order="2" visible="true" weight="0.32936078" />
+      <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
+      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
+      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
+      <window_info anchor="bottom" id="TODO" order="6" />
+      <window_info anchor="right" id="R Packages" />
+      <window_info anchor="right" id="R Graphics" />
+      <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
+      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
+      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
+    </layout>
+  </component>
+  <component name="VcsContentAnnotationSettings">
+    <option name="myLimit" value="2678400000" />
+  </component>
+  <component name="editorHistoryManager">
+    <entry file="file://$PROJECT_DIR$/process.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="527">
+          <caret line="49" column="34" lean-forward="true" selection-start-line="49" selection-start-column="34" selection-end-line="49" selection-end-column="34" />
+          <folding>
+            <element signature="e#0#24#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+  </component>
+</project>

Разлика између датотеке није приказан због своје велике величине
+ 20 - 0
src/data.json


Разлика између датотеке није приказан због своје велике величине
+ 36 - 0
src/efremova.txt


+ 122 - 0
src/process.py

@@ -0,0 +1,122 @@
+from pathlib import Path
+import json
+import re
+import requests
+
+
+def main():
+    dictionary_filename = "efremova.txt"
+    dictionary_json_filename = "data.json"
+    file = Path(dictionary_json_filename)
+    if file.is_file():
+        dictionary = read_dictionary_json(dictionary_json_filename)
+
+        for word, entry in dictionary.items():
+            if (
+                    entry["is_noun_by_dictionary"] and
+                    entry["is_possible_adjective"] and
+                    entry["answer_from_wiktionary"] == "null"
+            ):
+                response = requests.get('https://ru.wiktionary.org/wiki/' + word)
+                print('{} status_code = {}'.format(word, response.status_code))
+                if response.status_code == 200:
+                    html = response.text
+                    is_noun_by_wiktionary = False
+                    if "title=\"существительное\">Существительное</a>" in html:
+                        is_noun_by_wiktionary = True
+                    if "title=\"выступает в роли существительного\">субстантивир.</span>" in html:
+                        is_noun_by_wiktionary = True
+
+                    if is_noun_by_wiktionary:
+                        dictionary[word]["answer_from_wiktionary"] = True
+                    else:
+                        print("is_noun_by_wiktionary = {}".format(is_noun_by_wiktionary))
+                else:
+                    dictionary[word]["answer_from_wiktionary"] = response.status_code
+
+                save_dictionary_json(dictionary, dictionary_json_filename)
+                print("-------------------------")
+
+        #save_dictionary_json(dictionary, dictionary_json_filename)
+    else:
+        create_dictionary_json_filename(dictionary_filename, dictionary_json_filename)
+
+
+def save_dictionary_json(dictionary, dictionary_json_filename):
+    file = Path(dictionary_json_filename)
+    if file.is_file():
+        action_string = " updated"
+    else:
+        action_string = " created"
+    with open(dictionary_json_filename, 'w', encoding='utf8') as outfile:
+        json.dump(dictionary, outfile, ensure_ascii=False, indent=4)
+    print('File ' + dictionary_json_filename + action_string)
+
+
+def read_dictionary_json(dictionary_json_filename):
+    dictionary = dict()
+    file = Path(dictionary_json_filename)
+    with open(file, encoding='utf8') as f:
+        dictionary = json.loads(f.read())
+    print('File ' + dictionary_json_filename + ' opened')
+    return dictionary
+
+
+def create_dictionary_json_filename(dictionary_filename, dictionary_json_filename):
+    file = Path(dictionary_filename)
+    if file.is_file():
+        with open(file, encoding="utf8") as f:
+            lines = f.read().splitlines()
+
+        dictionary = dict()
+        for line in lines:
+            split_line = line.split(" ", 1)
+            word = split_line[0]
+            definition = split_line[1]
+
+            is_noun_by_dictionary = False
+            if re.match(r"(ж|м|ср|мн)\.(.*)$", definition) or re.match(r"(1\.|I) (ж|м|ср|мн)\.(.*)$", definition):
+                is_noun_by_dictionary = True
+
+            is_possible_adjective = False
+            if (
+                    word.endswith("ая") or
+                    word.endswith("ее") or
+                    word.endswith("ие") or
+                    word.endswith("ий") or
+                    word.endswith("ое") or
+                    word.endswith("ой") or
+                    word.endswith("ые") or
+                    word.endswith("ый") or
+                    word.endswith("ье") or
+                    word.endswith("ьи") or
+                    word.endswith("ья") or
+                    word.endswith("яя")
+            ):
+                is_possible_adjective = True
+
+            entry = dict()
+            entry["definition"] = definition
+            entry["is_noun_by_dictionary"] = is_noun_by_dictionary
+            entry["is_possible_adjective"] = is_possible_adjective
+            entry["answer_from_wiktionary"] = "null"
+            dictionary[word] = entry
+
+        save_dictionary_json(dictionary, dictionary_json_filename)
+    else:
+        print('File ' + dictionary_filename + ' not exists')
+
+
+def test():
+    # url = 'https://ru.wiktionary.org/wiki/' + quote('длиннокрылые')
+    # f = urllib.request.urlopen(url)
+    # html = f.read().decode('utf-8')
+    response = requests.get('https://ru.wiktionary.org/wiki/длиннокрылые')
+    html = response.text
+    print(response.status_code)
+    with open("output.txt", "w", encoding='utf8') as text_file:
+        text_file.write(html)
+
+
+if __name__ == '__main__':
+    main()

Неке датотеке нису приказане због велике количине промена