From b9379e7a26bce4ba76108da1b6f13ea22e236632 Mon Sep 17 00:00:00 2001 From: Go MAEDA Date: Thu, 3 Nov 2022 12:43:28 +0000 Subject: [PATCH] Allow using ideographic space (U+3000) as a separator for search terms (#37878). Patch by Go MAEDA. git-svn-id: https://svn.redmine.org/redmine/trunk@21952 e93f8b46-1217-0410-a6f0-8f06a7374b81 --- lib/redmine/search.rb | 2 +- test/unit/lib/redmine/search_test.rb | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/redmine/search.rb b/lib/redmine/search.rb index bf8b7633a..3c1e0700c 100644 --- a/lib/redmine/search.rb +++ b/lib/redmine/search.rb @@ -135,7 +135,7 @@ module Redmine def tokens # extract tokens from the question # eg. hello "bye bye" => ["hello", "bye bye"] - tokens = @question.scan(%r{((\s|^)"[^"]+"(\s|$)|\S+)}).collect {|m| m.first.gsub(%r{(^\s*"\s*|\s*"\s*$)}, '')} + tokens = @question.scan(%r{(([[:space:]]|^)"[^"]+"([[:space:]]|$)|[[:^space:]]+)}).collect {|m| m.first.gsub(%r{(^[[:space:]]*"[[:space:]]*|[[:space:]]*"[[:space:]]*$)}, '')} # tokens must be at least 2 characters long # but for Chinese characters (Chinese HANZI/Japanese KANJI), tokens can be one character # no more than 5 tokens to search for diff --git a/test/unit/lib/redmine/search_test.rb b/test/unit/lib/redmine/search_test.rb index e2944b4ac..219f40cfd 100644 --- a/test/unit/lib/redmine/search_test.rb +++ b/test/unit/lib/redmine/search_test.rb @@ -24,4 +24,10 @@ class Redmine::Search::Tokenize < ActiveSupport::TestCase value = "hello \"bye bye\"" assert_equal ["hello", "bye bye"], Redmine::Search::Tokenizer.new(value).tokens end + + def test_tokenize_should_consider_ideographic_space_as_separator + # U+3000 is an ideographic space (" ") + value = "全角\u3000スペース" + assert_equal %w[全角 スペース], Redmine::Search::Tokenizer.new(value).tokens + end end