diff --git a/Gemfile b/Gemfile index 40d6a2dabaa219a90bc3a6411abbec4a9bb5c4d8..27f91d38e81b6de9dc3623f1c6d77d5a97cd091b 100644 --- a/Gemfile +++ b/Gemfile @@ -34,7 +34,7 @@ gem 'bootsnap', '~> 1.4', require: false gem 'browser' gem 'charlock_holmes', '~> 0.7.7' gem 'iso-639' -gem 'chewy', git: 'https://github.com/noellabo/chewy.git', ref: 'bce53720beaab36d0cb8cfbd0bc98fb0e44ddaf0' +gem 'chewy', '~> 5.1' gem 'cld3', '~> 3.2.6' gem 'devise', '~> 4.7' gem 'devise-two-factor', '~> 3.1' diff --git a/Gemfile.lock b/Gemfile.lock index c00609671b153d72f488f996b490e28ecb3c4b64..5e4b63e7dec2394e02ac80712e4df0f48e9c1763 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -6,23 +6,6 @@ GIT health_check (4.0.0.pre) rails (>= 4.0) -GIT - remote: https://github.com/noellabo/chewy.git - revision: bce53720beaab36d0cb8cfbd0bc98fb0e44ddaf0 - ref: bce53720beaab36d0cb8cfbd0bc98fb0e44ddaf0 - specs: - chewy (5.1.0) - activesupport (>= 4.0) - elasticsearch (>= 7.0.0) - elasticsearch-dsl - faraday (>= 0.17.0) - -GIT - remote: https://github.com/rack/rack.git - revision: 4ebd70b243d79cecda1ba55abce8e2ead78395d7 - specs: - rack (2.2.0) - GIT remote: https://github.com/rtomayko/posix-spawn revision: 58465d2e213991f8afb13b984854a49fcdcc980c @@ -173,6 +156,10 @@ GEM case_transform (0.2) activesupport charlock_holmes (0.7.7) + chewy (5.1.0) + activesupport (>= 4.0) + elasticsearch (>= 2.0.0) + elasticsearch-dsl chunky_png (1.3.11) cld3 (3.2.6) ffi (>= 1.1.0, < 1.12.0) @@ -719,7 +706,7 @@ DEPENDENCIES capistrano-yarn (~> 2.0) capybara (~> 3.30) charlock_holmes (~> 0.7.7) - chewy! + chewy (~> 5.1) cld3 (~> 3.2.6) climate_control (~> 0.2) concurrent-ruby diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 70865ac8774f2fc3fff8b6fe92377d6aa5c37d23..215e1f82a27514fdc8d09ce83b97629c5d89bc07 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -2,26 +2,31 @@ class StatusesIndex < Chewy::Index settings index: { refresh_interval: '15m' }, analysis: { - tokenizer: { - sudachi_tokenizer: { - type: 'sudachi_tokenizer', - mode: 'search', - discard_punctuation: true, - resources_path: '/etc/elasticsearch', - settings_path: '/etc/elasticsearch/sudachi.json', + filter: { + english_stop: { + type: 'stop', + stopwords: '_english_', + }, + english_stemmer: { + type: 'stemmer', + language: 'english', + }, + english_possessive_stemmer: { + type: 'stemmer', + language: 'possessive_english', }, }, analyzer: { content: { + tokenizer: 'uax_url_email', filter: %w( + english_possessive_stemmer lowercase + asciifolding cjk_width - sudachi_part_of_speech - sudachi_ja_stop - sudachi_baseform + english_stop + english_stemmer ), - tokenizer: 'sudachi_tokenizer', - type: 'custom', }, }, }