From ceabee16849bed312d6955d2a26f07e29c852e17 Mon Sep 17 00:00:00 2001 From: noellabo <noel.yoshiba@gmail.com> Date: Fri, 28 Jun 2019 05:50:16 +0900 Subject: [PATCH] Set up elasticsearch 7 to use sudachi --- Gemfile | 2 +- Gemfile.lock | 17 ++++++++++++----- app/chewy/statuses_index.rb | 29 ++++++++++++----------------- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/Gemfile b/Gemfile index daab71588b..d24dc5f5c4 100644 --- a/Gemfile +++ b/Gemfile @@ -33,7 +33,7 @@ gem 'bootsnap', '~> 1.4', require: false gem 'browser' gem 'charlock_holmes', '~> 0.7.7' gem 'iso-639' -gem 'chewy', '~> 5.1' +gem 'chewy', git: 'https://github.com/noellabo/chewy.git', ref: 'bce53720beaab36d0cb8cfbd0bc98fb0e44ddaf0' gem 'cld3', '~> 3.2.6' gem 'devise', '~> 4.7' gem 'devise-two-factor', '~> 3.1' diff --git a/Gemfile.lock b/Gemfile.lock index 50bb61d75a..f6857f0902 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -6,6 +6,17 @@ GIT health_check (4.0.0.pre) rails (>= 4.0) +GIT + remote: https://github.com/noellabo/chewy.git + revision: bce53720beaab36d0cb8cfbd0bc98fb0e44ddaf0 + ref: bce53720beaab36d0cb8cfbd0bc98fb0e44ddaf0 + specs: + chewy (5.1.0) + activesupport (>= 4.0) + elasticsearch (>= 7.0.0) + elasticsearch-dsl + faraday (>= 0.17.0) + GIT remote: https://github.com/rtomayko/posix-spawn revision: 58465d2e213991f8afb13b984854a49fcdcc980c @@ -155,10 +166,6 @@ GEM case_transform (0.2) activesupport charlock_holmes (0.7.7) - chewy (5.1.0) - activesupport (>= 4.0) - elasticsearch (>= 2.0.0) - elasticsearch-dsl chunky_png (1.3.11) cld3 (3.2.6) ffi (>= 1.1.0, < 1.12.0) @@ -682,7 +689,7 @@ DEPENDENCIES capistrano-yarn (~> 2.0) capybara (~> 3.30) charlock_holmes (~> 0.7.7) - chewy (~> 5.1) + chewy! cld3 (~> 3.2.6) climate_control (~> 0.2) concurrent-ruby diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 2e0c0ccc39..29eac50eb8 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -2,31 +2,26 @@ class StatusesIndex < Chewy::Index settings index: { refresh_interval: '15m' }, analysis: { - filter: { - english_stop: { - type: 'stop', - stopwords: '_english_', - }, - english_stemmer: { - type: 'stemmer', - language: 'english', - }, - english_possessive_stemmer: { - type: 'stemmer', - language: 'possessive_english', + tokenizer: { + sudachi_tokenizer: { + type: 'sudachi_tokenizer', + mode: 'search', + discard_punctuation: true, + resources_path: '/etc/elasticsearch', + settings_path: '/etc/elasticsearch/sudachi.json', }, }, analyzer: { content: { - tokenizer: 'uax_url_email', filter: %w( - english_possessive_stemmer lowercase - asciifolding cjk_width - english_stop - english_stemmer + sudachi_part_of_speech + sudachi_ja_stop + sudachi_baseform ), + tokenizer: 'sudachi_tokenizer', + type: 'custom', }, }, } -- GitLab