diff --git a/Gemfile b/Gemfile index daab71588b2a70728aacf06ca9f4d4462714fc83..d24dc5f5c43cc9b1854ba58a5a8282747977d7cc 100644 --- a/Gemfile +++ b/Gemfile @@ -33,7 +33,7 @@ gem 'bootsnap', '~> 1.4', require: false gem 'browser' gem 'charlock_holmes', '~> 0.7.7' gem 'iso-639' -gem 'chewy', '~> 5.1' +gem 'chewy', git: 'https://github.com/noellabo/chewy.git', ref: 'bce53720beaab36d0cb8cfbd0bc98fb0e44ddaf0' gem 'cld3', '~> 3.2.6' gem 'devise', '~> 4.7' gem 'devise-two-factor', '~> 3.1' diff --git a/Gemfile.lock b/Gemfile.lock index 50bb61d75aed5876940ceb5b230f314f719d7321..f6857f090272bff51af990eb4f3bcc5352ea6ae6 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -6,6 +6,17 @@ GIT health_check (4.0.0.pre) rails (>= 4.0) +GIT + remote: https://github.com/noellabo/chewy.git + revision: bce53720beaab36d0cb8cfbd0bc98fb0e44ddaf0 + ref: bce53720beaab36d0cb8cfbd0bc98fb0e44ddaf0 + specs: + chewy (5.1.0) + activesupport (>= 4.0) + elasticsearch (>= 7.0.0) + elasticsearch-dsl + faraday (>= 0.17.0) + GIT remote: https://github.com/rtomayko/posix-spawn revision: 58465d2e213991f8afb13b984854a49fcdcc980c @@ -155,10 +166,6 @@ GEM case_transform (0.2) activesupport charlock_holmes (0.7.7) - chewy (5.1.0) - activesupport (>= 4.0) - elasticsearch (>= 2.0.0) - elasticsearch-dsl chunky_png (1.3.11) cld3 (3.2.6) ffi (>= 1.1.0, < 1.12.0) @@ -682,7 +689,7 @@ DEPENDENCIES capistrano-yarn (~> 2.0) capybara (~> 3.30) charlock_holmes (~> 0.7.7) - chewy (~> 5.1) + chewy! cld3 (~> 3.2.6) climate_control (~> 0.2) concurrent-ruby diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 2e0c0ccc398abd7c2359a72d0451e82c419587a1..29eac50eb89d3ae179001f4076525adf48dbb2ff 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -2,31 +2,26 @@ class StatusesIndex < Chewy::Index settings index: { refresh_interval: '15m' }, analysis: { - filter: { - english_stop: { - type: 'stop', - stopwords: '_english_', - }, - english_stemmer: { - type: 'stemmer', - language: 'english', - }, - english_possessive_stemmer: { - type: 'stemmer', - language: 'possessive_english', + tokenizer: { + sudachi_tokenizer: { + type: 'sudachi_tokenizer', + mode: 'search', + discard_punctuation: true, + resources_path: '/etc/elasticsearch', + settings_path: '/etc/elasticsearch/sudachi.json', }, }, analyzer: { content: { - tokenizer: 'uax_url_email', filter: %w( - english_possessive_stemmer lowercase - asciifolding cjk_width - english_stop - english_stemmer + sudachi_part_of_speech + sudachi_ja_stop + sudachi_baseform ), + tokenizer: 'sudachi_tokenizer', + type: 'custom', }, }, }