mirror of
https://github.com/meineerde/rackstash.git
synced 2025-10-17 14:01:01 +00:00
Extract utf8_encode helper into Rackstash::Helpers::UTF8 module
This commit is contained in:
parent
2c8d6ac009
commit
d73b7ba8e1
@ -11,9 +11,13 @@ require 'uri'
|
|||||||
|
|
||||||
require 'concurrent'
|
require 'concurrent'
|
||||||
|
|
||||||
|
require 'rackstash/helpers'
|
||||||
|
|
||||||
module Rackstash
|
module Rackstash
|
||||||
module Fields
|
module Fields
|
||||||
class AbstractCollection
|
class AbstractCollection
|
||||||
|
include Rackstash::Helpers::UTF8
|
||||||
|
|
||||||
# Equality -- Two collections are equal if they are of exactly the same
|
# Equality -- Two collections are equal if they are of exactly the same
|
||||||
# class and contain the same raw data according to `Object#==`.
|
# class and contain the same raw data according to `Object#==`.
|
||||||
#
|
#
|
||||||
@ -87,22 +91,6 @@ module Rackstash
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Encode the given String in UTF-8. If the given `str` is already
|
|
||||||
# correctly encoded and frozen, we just return it unchanged. In all other
|
|
||||||
# cases we return a UTF-8 encoded and frozen copy of the string.
|
|
||||||
#
|
|
||||||
# @param str [String, #to_s]
|
|
||||||
# @return [String]
|
|
||||||
def utf8_encode(str)
|
|
||||||
if str.instance_of?(String) && str.encoding == Encoding::UTF_8 && str.valid_encoding?
|
|
||||||
str.frozen? ? str : str.dup.freeze
|
|
||||||
else
|
|
||||||
str = str.to_s
|
|
||||||
str = str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
|
|
||||||
str.freeze
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def resolve_value(value, scope: nil)
|
def resolve_value(value, scope: nil)
|
||||||
return value unless value.is_a?(Proc)
|
return value unless value.is_a?(Proc)
|
||||||
scope.nil? ? value.call : scope.instance_exec(&value)
|
scope.nil? ? value.call : scope.instance_exec(&value)
|
||||||
@ -189,7 +177,7 @@ module Rackstash
|
|||||||
return normalize(value, scope: scope, wrap: wrap)
|
return normalize(value, scope: scope, wrap: wrap)
|
||||||
end
|
end
|
||||||
|
|
||||||
utf8_encode(value.inspect)
|
utf8_encode(value.inspect.freeze)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@ -76,7 +76,7 @@ module Rackstash
|
|||||||
value.flatten!
|
value.flatten!
|
||||||
value
|
value
|
||||||
else
|
else
|
||||||
utf8_encode(value).strip
|
utf8_encode(value).strip.freeze
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
14
lib/rackstash/helpers.rb
Normal file
14
lib/rackstash/helpers.rb
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# Copyright 2017 Holger Just
|
||||||
|
#
|
||||||
|
# This software may be modified and distributed under the terms
|
||||||
|
# of the MIT license. See the LICENSE.txt file for details.
|
||||||
|
|
||||||
|
module Rackstash
|
||||||
|
# Some utility function which are used throughout Rackstash.
|
||||||
|
module Helpers
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
require 'rackstash/helpers/utf8'
|
||||||
31
lib/rackstash/helpers/utf8.rb
Normal file
31
lib/rackstash/helpers/utf8.rb
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# Copyright 2017 Holger Just
|
||||||
|
#
|
||||||
|
# This software may be modified and distributed under the terms
|
||||||
|
# of the MIT license. See the LICENSE.txt file for details.
|
||||||
|
|
||||||
|
module Rackstash
|
||||||
|
module Helpers
|
||||||
|
# Provide helper functions to help with UTF8 handling of Strings.
|
||||||
|
module UTF8
|
||||||
|
protected
|
||||||
|
|
||||||
|
# Encode the given String in UTF-8. If the given `str` is already
|
||||||
|
# correctly encoded and frozen, we just return it unchanged. In all other
|
||||||
|
# cases we return a UTF-8 encoded and frozen copy of the string.
|
||||||
|
#
|
||||||
|
# @param str [String, #to_s]
|
||||||
|
# @return [String]
|
||||||
|
def utf8_encode(str)
|
||||||
|
if str.instance_of?(String) && str.encoding == Encoding::UTF_8 && str.valid_encoding?
|
||||||
|
str.frozen? ? str : str.dup.freeze
|
||||||
|
else
|
||||||
|
str = str.to_s
|
||||||
|
str = str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
|
||||||
|
str.freeze
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
@ -116,8 +116,7 @@ describe Rackstash::Fields::AbstractCollection do
|
|||||||
end
|
end
|
||||||
|
|
||||||
describe '#normalize' do
|
describe '#normalize' do
|
||||||
describe 'with String' do
|
it 'encodes Strings to UTF-8' do
|
||||||
it 'transforms encoding to UTF-8' do
|
|
||||||
utf8_str = 'Dönerstraße'
|
utf8_str = 'Dönerstraße'
|
||||||
latin_str = utf8_str.encode(Encoding::ISO8859_9)
|
latin_str = utf8_str.encode(Encoding::ISO8859_9)
|
||||||
expect(latin_str.encoding).to eql Encoding::ISO8859_9
|
expect(latin_str.encoding).to eql Encoding::ISO8859_9
|
||||||
@ -127,38 +126,6 @@ describe Rackstash::Fields::AbstractCollection do
|
|||||||
expect(normalize(latin_str)).to be_frozen
|
expect(normalize(latin_str)).to be_frozen
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'replaces invalid characters in correctly encoded strings' do
|
|
||||||
binary = Digest::SHA256.digest('string')
|
|
||||||
|
|
||||||
expect(normalize(binary)).to include '<27>'
|
|
||||||
expect(normalize(binary).encoding).to eql Encoding::UTF_8
|
|
||||||
expect(normalize(binary)).to be_frozen
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'replaces invalid characters in incorrectly encoded strings' do
|
|
||||||
strange = Digest::SHA256.digest('string').force_encoding(Encoding::UTF_8)
|
|
||||||
|
|
||||||
expect(normalize(strange)).to include '<27>'
|
|
||||||
expect(normalize(strange).encoding).to eql Encoding::UTF_8
|
|
||||||
expect(normalize(strange)).to be_frozen
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'dups and freezes valid strings' do
|
|
||||||
valid = String.new('Dönerstraße')
|
|
||||||
expect(valid).to_not be_frozen
|
|
||||||
|
|
||||||
expect(normalize(valid)).to eql(valid)
|
|
||||||
# Not object-equal since the string was dup'ed
|
|
||||||
expect(normalize(valid)).not_to equal valid
|
|
||||||
expect(normalize(valid)).to be_frozen
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'does not alter valid frozen strings' do
|
|
||||||
valid = 'Dönerstraße'.freeze
|
|
||||||
expect(normalize(valid)).to equal(valid)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'transforms Symbol to String' do
|
it 'transforms Symbol to String' do
|
||||||
symbol = :foo
|
symbol = :foo
|
||||||
|
|
||||||
|
|||||||
63
spec/rackstash/helpers/utf8_spec.rb
Normal file
63
spec/rackstash/helpers/utf8_spec.rb
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# Copyright 2017 Holger Just
|
||||||
|
#
|
||||||
|
# This software may be modified and distributed under the terms
|
||||||
|
# of the MIT license. See the LICENSE.txt file for details.
|
||||||
|
|
||||||
|
require 'spec_helper'
|
||||||
|
|
||||||
|
require 'rackstash/helpers/utf8'
|
||||||
|
|
||||||
|
describe Rackstash::Helpers::UTF8 do
|
||||||
|
it 'only defines protected methods' do
|
||||||
|
expect(described_class.public_instance_methods(false)).to be_empty
|
||||||
|
end
|
||||||
|
|
||||||
|
describe '#utf8_encode' do
|
||||||
|
def utf8_encode(*args)
|
||||||
|
Object.new.extend(described_class).send(:utf8_encode, *args)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'transforms encoding to UTF-8' do
|
||||||
|
utf8_str = 'Dönerstraße'
|
||||||
|
latin_str = utf8_str.encode(Encoding::ISO8859_9)
|
||||||
|
expect(latin_str.encoding).to eql Encoding::ISO8859_9
|
||||||
|
|
||||||
|
expect(utf8_encode(latin_str)).to eql utf8_str
|
||||||
|
expect(utf8_encode(latin_str).encoding).to eql Encoding::UTF_8
|
||||||
|
expect(utf8_encode(latin_str)).to be_frozen
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'replaces invalid characters in correctly encoded strings' do
|
||||||
|
binary = Digest::SHA256.digest('string')
|
||||||
|
|
||||||
|
expect(utf8_encode(binary)).to include '<27>'
|
||||||
|
expect(utf8_encode(binary).encoding).to eql Encoding::UTF_8
|
||||||
|
expect(utf8_encode(binary)).to be_frozen
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'replaces invalid characters in incorrectly encoded strings' do
|
||||||
|
strange = Digest::SHA256.digest('string').force_encoding(Encoding::UTF_8)
|
||||||
|
|
||||||
|
expect(utf8_encode(strange)).to include '<27>'
|
||||||
|
expect(utf8_encode(strange).encoding).to eql Encoding::UTF_8
|
||||||
|
expect(utf8_encode(strange)).to be_frozen
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'dups and freezes valid strings' do
|
||||||
|
valid = String.new('Dönerstraße')
|
||||||
|
expect(valid).to_not be_frozen
|
||||||
|
|
||||||
|
expect(utf8_encode(valid)).to eql(valid)
|
||||||
|
# Not object-equal since the string was dup'ed
|
||||||
|
expect(utf8_encode(valid)).not_to equal valid
|
||||||
|
expect(utf8_encode(valid)).to be_frozen
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'does not alter valid frozen strings' do
|
||||||
|
valid = 'Dönerstraße'.freeze
|
||||||
|
expect(utf8_encode(valid)).to equal(valid)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
Loading…
x
Reference in New Issue
Block a user