1
0
mirror of https://github.com/meineerde/rackstash.git synced 2026-02-05 00:23:22 +00:00

Don't copy or alter frozen UTF-8 strings in fields

With this, we optimize the common case where we do have valid UTF-8
strings to begin with. If the given String is already frozen, as is
common for e.g. Hash keys, we don't even need to create a new object.

With this change, we also always return frozen strings from
`Rackstash::Fields::AbstractCollection#utf9_encode`. This avoids an
unecessary object copy when inserting it in a Hash and still ensures
that values are always frozen anyway.
This commit is contained in:
Holger Just 2017-06-16 18:37:25 +02:00
parent aad1660135
commit 6eccf6cc37
2 changed files with 32 additions and 18 deletions

View File

@ -85,13 +85,20 @@ module Rackstash
end
end
# @param str [#to_s]
# Encode the given String in UTF-8. If the given `str` is already
# correctly encoded and frozen, we just return it unchanged. In all other
# cases we return a UTF-8 encoded and frozen copy of the string.
#
# @param str [String, #to_s]
# @return [String]
def utf8_encode(str)
str.to_s.encode(
Encoding::UTF_8,
invalid: :replace,
undef: :replace
)
if str.instance_of?(String) && str.encoding == Encoding::UTF_8 && str.valid_encoding?
str.frozen? ? str : str.dup.freeze
else
str = str.to_s
str = str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
str.freeze
end
end
def resolve_value(value, scope: nil)
@ -105,8 +112,10 @@ module Rackstash
value = resolve_value(value, scope: scope)
case value
when ::String, ::Symbol
return utf8_encode(value).freeze
when ::String
return utf8_encode(value)
when ::Symbol
return utf8_encode(value.to_s.freeze)
when ::Integer, ::Float
return value
when true, false, nil
@ -142,13 +151,13 @@ module Rackstash
when ::Date
return value.iso8601.encode!(Encoding::UTF_8).freeze
when ::Regexp, ::Range, ::URI::Generic, ::Pathname
return utf8_encode(value).freeze
return utf8_encode(value)
when Exception
exception = "#{value.message} (#{value.class})"
exception << "\n" << value.backtrace.join("\n") if value.backtrace
return utf8_encode(exception).freeze
return utf8_encode(exception)
when ::Proc
return utf8_encode(value.inspect).freeze
return utf8_encode(value.inspect)
when ::BigDecimal
# A BigDecimal would be naturally represented as a JSON number. Most
# libraries, however, parse non-integer JSON numbers directly as
@ -159,7 +168,7 @@ module Rackstash
when ::Complex
# A complex number can not reliably converted to a float or rational,
# thus we always transform it to a String
return utf8_encode(value).freeze
return utf8_encode(value)
end
# Try to convert the value to a known basic type and recurse
@ -176,7 +185,7 @@ module Rackstash
return normalize(value, scope: scope, wrap: wrap)
end
utf8_encode(value.inspect).freeze
utf8_encode(value.inspect)
end
end
end

View File

@ -141,15 +141,20 @@ describe Rackstash::Fields::AbstractCollection do
expect(normalize(strange)).to be_frozen
end
it 'does not alter valid strings' do
valid = 'Dönerstraße'.freeze
it 'dups and freezes valid strings' do
valid = String.new('Dönerstraße')
expect(valid).to_not be_frozen
expect(normalize(valid)).to eql valid
# All strings are still copied and frozen, even if they don't need to
# be re-encoded.
expect(normalize(valid)).to eql(valid)
# Not object-equal since the string was dup'ed
expect(normalize(valid)).not_to equal valid
expect(normalize(valid)).to be_frozen
end
it 'does not alter valid frozen strings' do
valid = 'Dönerstraße'.freeze
expect(normalize(valid)).to equal(valid)
end
end
it 'transforms Symbol to String' do