mirror of
https://github.com/meineerde/rackstash.git
synced 2026-02-05 00:23:22 +00:00
Don't copy or alter frozen UTF-8 strings in fields
With this, we optimize the common case where we do have valid UTF-8 strings to begin with. If the given String is already frozen, as is common for e.g. Hash keys, we don't even need to create a new object. With this change, we also always return frozen strings from `Rackstash::Fields::AbstractCollection#utf9_encode`. This avoids an unecessary object copy when inserting it in a Hash and still ensures that values are always frozen anyway.
This commit is contained in:
parent
aad1660135
commit
6eccf6cc37
@ -85,13 +85,20 @@ module Rackstash
|
||||
end
|
||||
end
|
||||
|
||||
# @param str [#to_s]
|
||||
# Encode the given String in UTF-8. If the given `str` is already
|
||||
# correctly encoded and frozen, we just return it unchanged. In all other
|
||||
# cases we return a UTF-8 encoded and frozen copy of the string.
|
||||
#
|
||||
# @param str [String, #to_s]
|
||||
# @return [String]
|
||||
def utf8_encode(str)
|
||||
str.to_s.encode(
|
||||
Encoding::UTF_8,
|
||||
invalid: :replace,
|
||||
undef: :replace
|
||||
)
|
||||
if str.instance_of?(String) && str.encoding == Encoding::UTF_8 && str.valid_encoding?
|
||||
str.frozen? ? str : str.dup.freeze
|
||||
else
|
||||
str = str.to_s
|
||||
str = str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
|
||||
str.freeze
|
||||
end
|
||||
end
|
||||
|
||||
def resolve_value(value, scope: nil)
|
||||
@ -105,8 +112,10 @@ module Rackstash
|
||||
value = resolve_value(value, scope: scope)
|
||||
|
||||
case value
|
||||
when ::String, ::Symbol
|
||||
return utf8_encode(value).freeze
|
||||
when ::String
|
||||
return utf8_encode(value)
|
||||
when ::Symbol
|
||||
return utf8_encode(value.to_s.freeze)
|
||||
when ::Integer, ::Float
|
||||
return value
|
||||
when true, false, nil
|
||||
@ -142,13 +151,13 @@ module Rackstash
|
||||
when ::Date
|
||||
return value.iso8601.encode!(Encoding::UTF_8).freeze
|
||||
when ::Regexp, ::Range, ::URI::Generic, ::Pathname
|
||||
return utf8_encode(value).freeze
|
||||
return utf8_encode(value)
|
||||
when Exception
|
||||
exception = "#{value.message} (#{value.class})"
|
||||
exception << "\n" << value.backtrace.join("\n") if value.backtrace
|
||||
return utf8_encode(exception).freeze
|
||||
return utf8_encode(exception)
|
||||
when ::Proc
|
||||
return utf8_encode(value.inspect).freeze
|
||||
return utf8_encode(value.inspect)
|
||||
when ::BigDecimal
|
||||
# A BigDecimal would be naturally represented as a JSON number. Most
|
||||
# libraries, however, parse non-integer JSON numbers directly as
|
||||
@ -159,7 +168,7 @@ module Rackstash
|
||||
when ::Complex
|
||||
# A complex number can not reliably converted to a float or rational,
|
||||
# thus we always transform it to a String
|
||||
return utf8_encode(value).freeze
|
||||
return utf8_encode(value)
|
||||
end
|
||||
|
||||
# Try to convert the value to a known basic type and recurse
|
||||
@ -176,7 +185,7 @@ module Rackstash
|
||||
return normalize(value, scope: scope, wrap: wrap)
|
||||
end
|
||||
|
||||
utf8_encode(value.inspect).freeze
|
||||
utf8_encode(value.inspect)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@ -141,15 +141,20 @@ describe Rackstash::Fields::AbstractCollection do
|
||||
expect(normalize(strange)).to be_frozen
|
||||
end
|
||||
|
||||
it 'does not alter valid strings' do
|
||||
valid = 'Dönerstraße'.freeze
|
||||
it 'dups and freezes valid strings' do
|
||||
valid = String.new('Dönerstraße')
|
||||
expect(valid).to_not be_frozen
|
||||
|
||||
expect(normalize(valid)).to eql valid
|
||||
# All strings are still copied and frozen, even if they don't need to
|
||||
# be re-encoded.
|
||||
expect(normalize(valid)).to eql(valid)
|
||||
# Not object-equal since the string was dup'ed
|
||||
expect(normalize(valid)).not_to equal valid
|
||||
expect(normalize(valid)).to be_frozen
|
||||
end
|
||||
|
||||
it 'does not alter valid frozen strings' do
|
||||
valid = 'Dönerstraße'.freeze
|
||||
expect(normalize(valid)).to equal(valid)
|
||||
end
|
||||
end
|
||||
|
||||
it 'transforms Symbol to String' do
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user