{"id":2700,"date":"2021-03-28T01:07:48","date_gmt":"2021-03-27T16:07:48","guid":{"rendered":"https:\/\/obenkyolab.com\/?p=2700"},"modified":"2022-01-29T00:17:27","modified_gmt":"2022-01-28T15:17:27","slug":"%e3%80%90python%e3%80%91doc2vec%e3%81%a7%e9%a1%9e%e4%bc%bc%e6%96%87%e6%9b%b8%e3%81%ae%e8%a1%a8%e7%a4%ba","status":"publish","type":"post","link":"https:\/\/obenkyolab.com\/?p=2700","title":{"rendered":"\u3010Python\u3011Doc2Vec\u3067\u985e\u4f3c\u6587\u66f8\u306e\u8868\u793a"},"content":{"rendered":"\n<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_80 counter-hierarchy ez-toc-counter ez-toc-grey ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\">\n<p class=\"ez-toc-title\" style=\"cursor:inherit\">Table of Contents<\/p>\n<span class=\"ez-toc-title-toggle\"><a href=\"#\" class=\"ez-toc-pull-right ez-toc-btn ez-toc-btn-xs ez-toc-btn-default ez-toc-toggle\" aria-label=\"Toggle Table of Content\"><span class=\"ez-toc-js-icon-con\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #999;color:#999\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #999;color:#999\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/span><\/a><\/span><\/div>\n<nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/obenkyolab.com\/?p=2700\/#%E6%A6%82%E8%A6%81\" >\u6982\u8981<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/obenkyolab.com\/?p=2700\/#%E3%82%BF%E3%82%B9%E3%82%AF%E8%A8%AD%E5%AE%9A\" >\u30bf\u30b9\u30af\u8a2d\u5b9a<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/obenkyolab.com\/?p=2700\/#%E4%BD%BF%E7%94%A8%E3%81%99%E3%82%8B%E8%AB%B8%E3%80%85\" >\u4f7f\u7528\u3059\u308b\u8af8\u3005<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/obenkyolab.com\/?p=2700\/#Step1%EF%BC%9A%E3%83%87%E3%83%BC%E3%82%BF%E5%8F%96%E5%BE%97\" >Step1\uff1a\u30c7\u30fc\u30bf\u53d6\u5f97<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/obenkyolab.com\/?p=2700\/#Step2%EF%BC%9Agensim%E3%81%AE%E3%82%A4%E3%83%B3%E3%82%B9%E3%83%88%E3%83%BC%E3%83%AB\" >Step2\uff1agensim\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"https:\/\/obenkyolab.com\/?p=2700\/#Step3%EF%BC%9A%E3%83%A2%E3%83%87%E3%83%AB%E4%BD%9C%E6%88%90\" >Step3\uff1a\u30e2\u30c7\u30eb\u4f5c\u6210<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"https:\/\/obenkyolab.com\/?p=2700\/#%E3%83%87%E3%83%BC%E3%82%BF%E8%AA%AD%E3%81%BF%E8%BE%BC%E3%81%BF\" >\u30c7\u30fc\u30bf\u8aad\u307f\u8fbc\u307f<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"https:\/\/obenkyolab.com\/?p=2700\/#%E5%BD%A2%E6%85%8B%E7%B4%A0%E8%A7%A3%E6%9E%90\" >\u5f62\u614b\u7d20\u89e3\u6790<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"https:\/\/obenkyolab.com\/?p=2700\/#%E5%AD%A6%E7%BF%92%E7%94%A8%E3%83%87%E3%83%BC%E3%82%BF%E6%BA%96%E5%82%99\" >\u5b66\u7fd2\u7528\u30c7\u30fc\u30bf\u6e96\u5099<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-10\" href=\"https:\/\/obenkyolab.com\/?p=2700\/#%E3%83%A2%E3%83%87%E3%83%AB%E4%BD%9C%E6%88%90\" >\u30e2\u30c7\u30eb\u4f5c\u6210<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-11\" href=\"https:\/\/obenkyolab.com\/?p=2700\/#Step4%EF%BC%9A%E8%A9%95%E4%BE%A1%EF%BC%88%E5%AE%9A%E6%80%A7%EF%BC%89\" >Step4\uff1a\u8a55\u4fa1\uff08\u5b9a\u6027\uff09<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-12\" href=\"https:\/\/obenkyolab.com\/?p=2700\/#%E5%8F%82%E8%80%83%E3%81%AB%E3%81%95%E3%81%9B%E3%81%A6%E9%A0%82%E3%81%84%E3%81%9F%E3%82%B5%E3%82%A4%E3%83%88\" >\u53c2\u8003\u306b\u3055\u305b\u3066\u9802\u3044\u305f\u30b5\u30a4\u30c8<\/a><\/li><\/ul><\/li><\/ul><\/nav><\/div>\n<h2 class=\"wp-block-heading\" id=\"\u6982\u8981\"><span class=\"ez-toc-section\" id=\"%E6%A6%82%E8%A6%81\"><\/span>\u6982\u8981<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u5206\u91ce\u306b\u304a\u3044\u3066\u5358\u8a9e\u306e\u5206\u6563\u8868\u73fe\u3092\u7372\u5f97\u3059\u308b\u624b\u6cd5\u3068\u3057\u3066Word2Vec\u304c\u3042\u308a\u307e\u3059\u304c\u3001\u305d\u306e\u6587\u7ae0\u7248\u3001\u3064\u307e\u308a\u6587\u7ae0\u3092\u5206\u6563\u8868\u73fe\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u308b\u624b\u6cd5\u3068\u3057\u3066Doc2Vec\u304c\u3042\u308a\u307e\u3059\u3002\u4eca\u56de\u306fPython\u3067Doc2Vec\u306e\u4f7f\u3044\u65b9\u306b\u3064\u3044\u3066\u52c9\u5f37\u3057\u307e\u3057\u305f\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"\u30bf\u30b9\u30af\u8a2d\u5b9a\"><span class=\"ez-toc-section\" id=\"%E3%82%BF%E3%82%B9%E3%82%AF%E8%A8%AD%E5%AE%9A\"><\/span>\u30bf\u30b9\u30af\u8a2d\u5b9a<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u6587\u7ae0\u7fa4\u3092Doc2Vec\u3067\u30d9\u30af\u30c8\u30eb\u5316\u3057\u3001\u305d\u306e\u306a\u304b\u306e\u4e00\u3064\u306e\u6587\u7ae0\u3092\u9078\u3073\u3001\u305d\u308c\u3068\u985e\u4f3c\u5ea6\u306e\u9ad8\u3044\u6587\u66f8\u3092\u6587\u66f8\u7fa4\u306e\u4e2d\u304b\u3089\u9078\u3093\u3067\u8868\u793a\u3059\u308b\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"\u4f7f\u7528\u3059\u308b\u8af8\u3005\"><span class=\"ez-toc-section\" id=\"%E4%BD%BF%E7%94%A8%E3%81%99%E3%82%8B%E8%AB%B8%E3%80%85\"><\/span>\u4f7f\u7528\u3059\u308b\u8af8\u3005<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<ul class=\"wp-block-list\"><li>Mecab 0.996<\/li><li>gensim<\/li><li>livedoor News<\/li><\/ul>\n\n\n\n<p>MeCab\u306f\u4ee5\u4e0b\u3067\u69cb\u7bc9\u3057\u305f\u3082\u306e\u3092\u4f7f\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<figure class=\"wp-block-embed is-type-wp-embed is-provider-it-learning wp-block-embed-it-learning\"><div class=\"wp-block-embed__wrapper\">\n<blockquote class=\"wp-embedded-content\" data-secret=\"hH5MiwyvP7\"><a href=\"https:\/\/obenkyolab.com\/?p=2682\">Windows10\u74b0\u5883\u306ePython3\u3067MeCab\u3092\u4f7f\u3048\u308b\u3088\u3046\u306b\u3059\u308b\u307e\u3067<\/a><\/blockquote><iframe loading=\"lazy\" class=\"wp-embedded-content\" sandbox=\"allow-scripts\" security=\"restricted\" style=\"position: absolute; clip: rect(1px, 1px, 1px, 1px);\" title=\"&#8220;Windows10\u74b0\u5883\u306ePython3\u3067MeCab\u3092\u4f7f\u3048\u308b\u3088\u3046\u306b\u3059\u308b\u307e\u3067&#8221; &#8212; Obenkyolab\" src=\"https:\/\/obenkyolab.com\/?p=2682&#038;embed=true#?secret=H4fsj2Lb5c#?secret=hH5MiwyvP7\" data-secret=\"hH5MiwyvP7\" width=\"500\" height=\"282\" frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" scrolling=\"no\"><\/iframe>\n<\/div><\/figure>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"step1-\u30c7\u30fc\u30bf\u53d6\u5f97\"><span class=\"ez-toc-section\" id=\"Step1%EF%BC%9A%E3%83%87%E3%83%BC%E3%82%BF%E5%8F%96%E5%BE%97\"><\/span>Step1\uff1a\u30c7\u30fc\u30bf\u53d6\u5f97<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u4eca\u56de\u306f\u4ee5\u4e0b\u304b\u3089livedoor News\u30b3\u30fc\u30d1\u30b9\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3066\u4f7f\u7528\u3055\u305b\u3066\u9802\u304d\u307e\u3057\u305f\u3002\u3042\u308a\u304c\u3068\u3046\u3054\u3056\u3044\u307e\u3059\u3002<br>livedoor \u30cb\u30e5\u30fc\u30b9\u30b3\u30fc\u30d1\u30b9\uff1a<a href=\"https:\/\/www.rondhuit.com\/download.html#ldcc\">https:\/\/www.rondhuit.com\/download.html#ldcc<\/a><\/p>\n\n\n\n<p>ldcc-20140209.tar.gz\u3092\u30b5\u30a4\u30c8\u304b\u3089\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u30577zip\u3067\u5c55\u958b\u3057\u307e\u3057\u305f\u3002<br>\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u69cb\u9020\u3068\u3057\u3066\u306f\u3053\u306e\u3088\u3046\u306a\u5f62\u306b\u306a\u3063\u3066\u3044\u3066\u3001\u5404\u30d5\u30a9\u30eb\u30c0\u306e\u4e2d\u306b.txt\u306e\u5f62\u5f0f\u3067\u30b3\u30fc\u30d1\u30b9\u304c\u683c\u7d0d\u3055\u308c\u3066\u3044\u307e\u3057\u305f\u3002<\/p>\n\n\n\n<p>text<br>|- dokujo-tsushin<br>|- it-life-hack<br>|- <strong>kaden-channel<\/strong><br>|- livedoor-homme<br>|- movie-enter<br>|- peachy<br>|- smax<br>|- <strong>sports-watch<\/strong><br>|- topic-news<br>|- CHANGES.txt<br>|- README.txt<\/p>\n\n\n\n<p>\u4eca\u56de\u306fkaden-channel\u3068sports-watch\u30c7\u30fc\u30bf\u3092\u4f7f\u7528\u3059\u308b\u3053\u3068\u306b\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p><\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"step2-gensim\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\"><span class=\"ez-toc-section\" id=\"Step2%EF%BC%9Agensim%E3%81%AE%E3%82%A4%E3%83%B3%E3%82%B9%E3%83%88%E3%83%BC%E3%83%AB\"><\/span>Step2\uff1agensim\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\npip install gensim\n<\/pre><\/div>\n\n\n<p><\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"step3-\u30e2\u30c7\u30eb\u4f5c\u6210\"><span class=\"ez-toc-section\" id=\"Step3%EF%BC%9A%E3%83%A2%E3%83%87%E3%83%AB%E4%BD%9C%E6%88%90\"><\/span>Step3\uff1a\u30e2\u30c7\u30eb\u4f5c\u6210<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"\u30c7\u30fc\u30bf\u8aad\u307f\u8fbc\u307f\"><span class=\"ez-toc-section\" id=\"%E3%83%87%E3%83%BC%E3%82%BF%E8%AA%AD%E3%81%BF%E8%BE%BC%E3%81%BF\"><\/span>\u30c7\u30fc\u30bf\u8aad\u307f\u8fbc\u307f<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<p>\u307e\u305a\u306f\u5c55\u958b\u3057\u305f\u30c7\u30fc\u30bf\u3092glob\u3067\u8aad\u307f\u8fbc\u307f\u307e\u3059\u3002kaden-channel\u3068sports-watch\u306e\u307f\u306b\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<div class=\"hcb_wrap\"><pre class=\"prism line-numbers lang-python\" data-lang=\"Python\"><code>import os\nfrom glob import glob\nos.chdir(&#39;[\u5c55\u958b\u3057\u305f\u30c7\u30a3\u30ec\u30af\u30c8\u30ea]\\\\text&#39;)\n\ntext = []\n#\u5bb6\u96fb\u30c1\u30e3\u30f3\u30cd\u30eb\u8aad\u307f\u8fbc\u307f\nfor file in glob(&#39;kaden-channel\\\\*.txt&#39;):\n    with open(file,encoding=&quot;utf-8&quot;) as f:        \n        text.append([f.read()])\n\nl_kaden = len(text)\nprint(&#39;\u5bb6\u96fb\u30c1\u30e3\u30f3\u30cd\u30eb\u8a18\u4e8b\u6570\uff1a&#39;,l_kaden)\n        \n#\u30b9\u30dd\u30fc\u30c4\u30a6\u30a9\u30c3\u30c1\u8aad\u307f\u8fbc\u307f\nfor file in glob(&#39;sports-watch\\\\*.txt&#39;):\n    with open(file,encoding=&quot;utf-8&quot;) as f:        \n        text.append([f.read()])     \n\nprint(&#39;\u30b9\u30dd\u30fc\u30c4\u30c1\u30e3\u30f3\u30cd\u30eb\u8a18\u4e8b\u6570\uff1a&#39;,len(text)-l_kaden)<\/code><\/pre><\/div>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\n\u5bb6\u96fb\u30c1\u30e3\u30f3\u30cd\u30eb\u8a18\u4e8b\u6570\uff1a 865\n\u30b9\u30dd\u30fc\u30c4\u30c1\u30e3\u30f3\u30cd\u30eb\u8a18\u4e8b\u6570\uff1a 901\n<\/pre><\/div>\n\n\n<h3 class=\"wp-block-heading\" id=\"\u5f62\u614b\u7d20\u89e3\u6790\"><span class=\"ez-toc-section\" id=\"%E5%BD%A2%E6%85%8B%E7%B4%A0%E8%A7%A3%E6%9E%90\"><\/span>\u5f62\u614b\u7d20\u89e3\u6790<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<p>gensim\u306eDoc2Vec\u306b\u6587\u7ae0\u3092\u6e21\u3059\u305f\u3081\u306b\u306f\u5f62\u614b\u7d20\u89e3\u6790\u3092\u3057\u3066\u304a\u304f\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<br>\u4ee5\u4e0b\u306e\u3088\u3046\u306bMeCab\u306eparse\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u7528\u3057\u3066\u5206\u304b\u3061\u66f8\u304d\u3057\u305f\u4e0a\u3067\u30ea\u30b9\u30c8\u306b\u683c\u7d0d\u3057\u3066\u3044\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<div class=\"hcb_wrap\"><pre class=\"prism line-numbers lang-python\" data-file=\"keitaiso\" data-lang=\"Python\"><code>import MeCab\nm = MeCab.Tagger(&quot;-Ochasen&quot;)\ntext_wakati= []\nfor w in text:\n    text_wakati.append([d.split()[0] for d in m.parse(w[0]).splitlines()])<\/code><\/pre><\/div>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"\u5b66\u7fd2\u7528\u30c7\u30fc\u30bf\u6e96\u5099\"><span class=\"ez-toc-section\" id=\"%E5%AD%A6%E7%BF%92%E7%94%A8%E3%83%87%E3%83%BC%E3%82%BF%E6%BA%96%E5%82%99\"><\/span>\u5b66\u7fd2\u7528\u30c7\u30fc\u30bf\u6e96\u5099<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<p>\u6b21\u306bDoc2Vec\u30e1\u30bd\u30c3\u30c9\u306b\u98df\u308f\u305b\u308b\u305f\u3081\u306e\u5b66\u7fd2\u7528\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u4f5c\u3063\u3066\u3044\u304d\u307e\u3059\u3002gensim\u306eTaggedDocument\u30e1\u30bd\u30c3\u30c9\u306b\u5206\u304b\u3061\u66f8\u304d\u3057\u305f\u30c7\u30fc\u30bf\u30ea\u30b9\u30c8\u3068\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u756a\u53f7\u3092\u6e21\u3057\u3001\u5b66\u7fd2\u7528\u30c7\u30fc\u30bf\u3092\u4f5c\u6210\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<div class=\"hcb_wrap\"><pre class=\"prism line-numbers lang-python\" data-lang=\"Python\"><code>from gensim.models.doc2vec import TaggedDocument\ncnt = 0\ndoc_train = []\nfor words in text_wakati:\n    doc_train.append(TaggedDocument(words,[cnt]))\n    cnt += 1<\/code><\/pre><\/div>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"\u30e2\u30c7\u30eb\u4f5c\u6210\"><span class=\"ez-toc-section\" id=\"%E3%83%A2%E3%83%87%E3%83%AB%E4%BD%9C%E6%88%90\"><\/span>\u30e2\u30c7\u30eb\u4f5c\u6210<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u4f5c\u3063\u305fdoc_train\u30ea\u30b9\u30c8\u3092Doc2Vec\u306b\u6e21\u3057\u3001\u30e2\u30c7\u30eb\u3092\u4f5c\u6210\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<div class=\"hcb_wrap\"><pre class=\"prism line-numbers lang-python\" data-lang=\"Python\"><code>from gensim.models.doc2vec import Doc2Vec\nmodel = Doc2Vec(doc_train,dm=1, size=200, min_count=10, epochs=20)<\/code><\/pre><\/div>\n\n\n\n<p>\u30d1\u30e9\u30e1\u30fc\u30bf\u8aac\u660e\uff08\u3084\u3063\u3066\u307f\u305f\u611f\u3058\u30a8\u30dd\u30c3\u30af\u6570\u306f\u5c11\u306a\u3059\u304e\u308b\u3068\u3042\u307e\u308a\u7cbe\u5ea6\u304c\u4e0a\u304c\u3089\u306a\u3044\u5370\u8c61\u3067\u3057\u305f\u3002\uff09<\/p>\n\n\n\n<ul class=\"wp-block-list\"><li>dm=1\u3067PV-DM<\/li><li>size\uff1a\u30d9\u30af\u30c8\u30eb\u8868\u73fe\u3059\u308b\u3068\u304d\u306e\u6b21\u5143\u30b5\u30a4\u30ba<\/li><li>min_count\uff1a\u7121\u8996\u3059\u308b\u51fa\u73fe\u56de\u6570\u95be\u5024<\/li><li>epoch\uff1a\u30a8\u30dd\u30c3\u30af\u6570<\/li><\/ul>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"step4-\u8a55\u4fa1-\u5b9a\u6027\"><span class=\"ez-toc-section\" id=\"Step4%EF%BC%9A%E8%A9%95%E4%BE%A1%EF%BC%88%E5%AE%9A%E6%80%A7%EF%BC%89\"><\/span>Step4\uff1a\u8a55\u4fa1\uff08\u5b9a\u6027\uff09<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u8a18\u4e8b\u306e\u4e2d\u304b\u3089\u9069\u5f53\u306b\u4e00\u3064\u9078\u3093\u3067\u3001\u305d\u308c\u3068\u985e\u4f3c\u3059\u308b\u4e0a\u4f4d\uff15\u4ef6\u306e\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306e\u8a18\u4e8b\u3092\u8868\u793a\u3057\u3066\u3001\u4eba\u9593\u304c\u898b\u305f\u6642\u306b\u4f3c\u305f\u3088\u3046\u306a\u8a18\u4e8b\u304c\u9078\u3070\u308c\u3066\u3044\u308b\u306e\u304b\u3092\u8a55\u4fa1\u3057\u3066\u307f\u307e\u3059\u3002<br>\u4e0b\u8a18\u306e\u30b3\u30fc\u30c9\u3067index\u306e\u6570\u5b57\u3092\u5909\u3048\u3066\u4efb\u610f\u306e\u8a18\u4e8b\u3092\u9078\u3073\u307e\u3059\u3002\u4e0a\u304b\u3089865\u8a18\u4e8b\u306f\u5bb6\u96fb\u30c1\u30e3\u30f3\u30cd\u30eb\u306a\u306e\u3067\u3001\u4ee5\u4e0b\u306eindex=1500\u306e\u4f8b\u306f\u30b9\u30dd\u30fc\u30c4\u30a6\u30a9\u30c3\u30c1\u306e\u8a18\u4e8b\u304b\u3089\u9078\u3093\u3067\u3044\u308b\u3053\u3068\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<div class=\"hcb_wrap\"><pre class=\"prism line-numbers lang-python\" data-lang=\"Python\"><code>index = 1500\nsims = model.docvecs.most_similar(index)\nsims\n\n------\n(1492, 0.7714306712150574),\n (1398, 0.6727927923202515),\n (1753, 0.6568506360054016),\n (1501, 0.6562802791595459),\n (1546, 0.6521874666213989),\n (1560, 0.6450892686843872),\n (1073, 0.6395328044891357),\n (1106, 0.6377852559089661),\n (1732, 0.6305040121078491),\n (1585, 0.630274772644043)<\/code><\/pre><\/div>\n\n\n\n<div class=\"hcb_wrap\"><pre class=\"prism line-numbers lang-python\" data-lang=\"Python\"><code>print(text[index],&#39;\\n&#39;)\nprint(text[1492],&#39;\\n&#39;)<\/code><\/pre><\/div>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"700\" height=\"119\" src=\"https:\/\/obenkyolab.com\/wp-content\/uploads\/2021\/03\/image-17-700x119.png\" alt=\"\" class=\"wp-image-2718\" srcset=\"https:\/\/obenkyolab.com\/wp-content\/uploads\/2021\/03\/image-17-700x119.png 700w, https:\/\/obenkyolab.com\/wp-content\/uploads\/2021\/03\/image-17-300x51.png 300w, https:\/\/obenkyolab.com\/wp-content\/uploads\/2021\/03\/image-17-768x130.png 768w, https:\/\/obenkyolab.com\/wp-content\/uploads\/2021\/03\/image-17.png 1303w\" sizes=\"auto, (max-width: 700px) 100vw, 700px\" \/><\/figure>\n\n\n\n<p>index 1500\u306e\u8a18\u4e8b\u306f\u30c6\u30cb\u30b9\u306e\u9326\u7e54\u572d\u9078\u624b\u95a2\u9023\u306e\u8a18\u4e8b\u3067\u3001\u985e\u4f3c\u5ea6\u30c8\u30c3\u30d7\u3067\u4e0a\u304c\u3063\u3066\u304d\u305findex 1492\u306e\u8a18\u4e8b\u3082\u540c\u3058\u304f\u30c6\u30cb\u30b9\u306e\u9326\u7e54\u572d\u9078\u624b\u306e\u8a71\u984c\u304c\u542b\u307e\u308c\u308b\u8a18\u4e8b\u3067\u3057\u305f\u3002\u5927\u3057\u305f\u524d\u51e6\u7406\u3082\u305b\u305a\u306b\u3053\u3093\u306a\u7c21\u5358\u306b\u985e\u4f3c\u6587\u66f8\u304c\u3067\u3066\u304f\u308b\u306e\u306f\u3059\u3054\u3044\u3067\u3059\u306d\u3002\u307e\u305f\u3001\u5c11\u306a\u304f\u3068\u3082\u5bb6\u96fb\u30c1\u30e3\u30f3\u30cd\u30eb\u304b\u3089\u306f\u4e00\u3064\u3082\u9078\u3070\u308c\u3066\u3044\u306a\u3044\u305f\u3081\u3001\u305d\u308c\u306a\u308a\u306e\u5b66\u7fd2\u306f\u3067\u304d\u3066\u3044\u308b\u69d8\u5b50\u3067\u3059\u3002<\/p>\n\n\n\n<p><br>\u3061\u306a\u307f\u306b\u4e0a\u8a18\u3067\u9078\u3070\u308c\u305f\u8a18\u4e8b\u306e\u4e2d\u306b\u306f\u5973\u5b50\u30b5\u30c3\u30ab\u30fc\u306e\u8a71\u984c\u306a\u3069\u3082\u542b\u307e\u308c\u3066\u3044\u305f\u306e\u3067\u3001\u3082\u3046\u5c11\u3057\u524d\u51e6\u7406\u3084\u30d1\u30e9\u30e1\u30fc\u30bf\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u3092\u8ffd\u3044\u8fbc\u3093\u3067\u307f\u3066\u3082\u3044\u3044\u306e\u304b\u3082\u3068\u601d\u3044\u307e\u3057\u305f\u3002\u4e0a\u3067\u3082\u66f8\u3044\u3066\u3044\u307e\u3059\u304c\u3001epoch\u6570\u3092\u5897\u3084\u3059\u3068\u3084\u306f\u308a\u7cbe\u5ea6\u304c\u4e0a\u304c\u3063\u3066\u304f\u308b\u611f\u3058\u304c\u3042\u308a\u307e\u3057\u305f\u3002\u7aef\u672b\u306e\u30b9\u30da\u30c3\u30af\u304c\u3042\u308b\u5834\u5408\u306f\u3082\u3063\u3068epoch\u6570\u3092\u5897\u3084\u3057\u3066\u3082\u3044\u3044\u306e\u304b\u3082\u3057\u308c\u306a\u3044\u3067\u3059\u3002<\/p>\n\n\n\n<p><\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"\u53c2\u8003\u306b\u3055\u305b\u3066\u9802\u3044\u305f\u30b5\u30a4\u30c8\"><span class=\"ez-toc-section\" id=\"%E5%8F%82%E8%80%83%E3%81%AB%E3%81%95%E3%81%9B%E3%81%A6%E9%A0%82%E3%81%84%E3%81%9F%E3%82%B5%E3%82%A4%E3%83%88\"><\/span>\u53c2\u8003\u306b\u3055\u305b\u3066\u9802\u3044\u305f\u30b5\u30a4\u30c8<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<p>\u985e\u4f3c\u691c\u7d22\u306a\u3069\uff1a<a href=\"https:\/\/qiita.com\/g-k\/items\/5ea94c13281f675302ca\">Doc2Vec\u306b\u3064\u3044\u3066\u307e\u3068\u3081\u308b<\/a><\/p>\n\n\n\n<p>\u30d1\u30e9\u30e1\u30fc\u30bf\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u306a\u3069\uff1a<a href=\"https:\/\/buildersbox.corp-sansan.com\/entry\/2019\/04\/10\/110000\">Doc2Vec\u306b\u3088\u308b\u6587\u66f8\u30d9\u30af\u30c8\u30eb\u63a8\u8ad6\u306e\u5b89\u5b9a\u5316\u306b\u3064\u3044\u3066<\/a><\/p>\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6982\u8981 \u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u5206\u91ce\u306b\u304a\u3044\u3066\u5358\u8a9e\u306e\u5206\u6563\u8868\u73fe\u3092\u7372\u5f97\u3059\u308b\u624b\u6cd5\u3068\u3057\u3066Wo&#46;&#46;&#46;<\/p>\n","protected":false},"author":1,"featured_media":1218,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"_locale":"ja","_original_post":"https:\/\/obenkyolab.com\/?p=2700","footnotes":""},"categories":[7,53],"tags":[],"class_list":["post-2700","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-python","category-53","ja"],"_links":{"self":[{"href":"https:\/\/obenkyolab.com\/index.php?rest_route=\/wp\/v2\/posts\/2700","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/obenkyolab.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/obenkyolab.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/obenkyolab.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/obenkyolab.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2700"}],"version-history":[{"count":23,"href":"https:\/\/obenkyolab.com\/index.php?rest_route=\/wp\/v2\/posts\/2700\/revisions"}],"predecessor-version":[{"id":4722,"href":"https:\/\/obenkyolab.com\/index.php?rest_route=\/wp\/v2\/posts\/2700\/revisions\/4722"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/obenkyolab.com\/index.php?rest_route=\/wp\/v2\/media\/1218"}],"wp:attachment":[{"href":"https:\/\/obenkyolab.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2700"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/obenkyolab.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=2700"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/obenkyolab.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=2700"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}