From 6eccf6cc379292aaa43b82416ba887d55ac16341 Mon Sep 17 00:00:00 2001 From: Holger Just Date: Fri, 16 Jun 2017 18:37:25 +0200 Subject: [PATCH] Don't copy or alter frozen UTF-8 strings in fields With this, we optimize the common case where we do have valid UTF-8 strings to begin with. If the given String is already frozen, as is common for e.g. Hash keys, we don't even need to create a new object. With this change, we also always return frozen strings from `Rackstash::Fields::AbstractCollection#utf9_encode`. This avoids an unecessary object copy when inserting it in a Hash and still ensures that values are always frozen anyway. --- lib/rackstash/fields/abstract_collection.rb | 35 ++++++++++++------- .../fields/abstract_collection_spec.rb | 15 +++++--- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/lib/rackstash/fields/abstract_collection.rb b/lib/rackstash/fields/abstract_collection.rb index 66d2d3e..bcf7316 100644 --- a/lib/rackstash/fields/abstract_collection.rb +++ b/lib/rackstash/fields/abstract_collection.rb @@ -85,13 +85,20 @@ module Rackstash end end - # @param str [#to_s] + # Encode the given String in UTF-8. If the given `str` is already + # correctly encoded and frozen, we just return it unchanged. In all other + # cases we return a UTF-8 encoded and frozen copy of the string. + # + # @param str [String, #to_s] + # @return [String] def utf8_encode(str) - str.to_s.encode( - Encoding::UTF_8, - invalid: :replace, - undef: :replace - ) + if str.instance_of?(String) && str.encoding == Encoding::UTF_8 && str.valid_encoding? + str.frozen? ? str : str.dup.freeze + else + str = str.to_s + str = str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace) + str.freeze + end end def resolve_value(value, scope: nil) @@ -105,8 +112,10 @@ module Rackstash value = resolve_value(value, scope: scope) case value - when ::String, ::Symbol - return utf8_encode(value).freeze + when ::String + return utf8_encode(value) + when ::Symbol + return utf8_encode(value.to_s.freeze) when ::Integer, ::Float return value when true, false, nil @@ -142,13 +151,13 @@ module Rackstash when ::Date return value.iso8601.encode!(Encoding::UTF_8).freeze when ::Regexp, ::Range, ::URI::Generic, ::Pathname - return utf8_encode(value).freeze + return utf8_encode(value) when Exception exception = "#{value.message} (#{value.class})" exception << "\n" << value.backtrace.join("\n") if value.backtrace - return utf8_encode(exception).freeze + return utf8_encode(exception) when ::Proc - return utf8_encode(value.inspect).freeze + return utf8_encode(value.inspect) when ::BigDecimal # A BigDecimal would be naturally represented as a JSON number. Most # libraries, however, parse non-integer JSON numbers directly as @@ -159,7 +168,7 @@ module Rackstash when ::Complex # A complex number can not reliably converted to a float or rational, # thus we always transform it to a String - return utf8_encode(value).freeze + return utf8_encode(value) end # Try to convert the value to a known basic type and recurse @@ -176,7 +185,7 @@ module Rackstash return normalize(value, scope: scope, wrap: wrap) end - utf8_encode(value.inspect).freeze + utf8_encode(value.inspect) end end end diff --git a/spec/rackstash/fields/abstract_collection_spec.rb b/spec/rackstash/fields/abstract_collection_spec.rb index c3cb104..5ce1d04 100644 --- a/spec/rackstash/fields/abstract_collection_spec.rb +++ b/spec/rackstash/fields/abstract_collection_spec.rb @@ -141,15 +141,20 @@ describe Rackstash::Fields::AbstractCollection do expect(normalize(strange)).to be_frozen end - it 'does not alter valid strings' do - valid = 'Dönerstraße'.freeze + it 'dups and freezes valid strings' do + valid = String.new('Dönerstraße') + expect(valid).to_not be_frozen - expect(normalize(valid)).to eql valid - # All strings are still copied and frozen, even if they don't need to - # be re-encoded. + expect(normalize(valid)).to eql(valid) + # Not object-equal since the string was dup'ed expect(normalize(valid)).not_to equal valid expect(normalize(valid)).to be_frozen end + + it 'does not alter valid frozen strings' do + valid = 'Dönerstraße'.freeze + expect(normalize(valid)).to equal(valid) + end end it 'transforms Symbol to String' do