1
0
mirror of https://github.com/meineerde/rackstash.git synced 2025-12-19 15:01:12 +00:00

Extract utf8_encode helper into Rackstash::Helpers::UTF8 module

This commit is contained in:
Holger Just 2017-07-20 14:07:04 +02:00
parent 2c8d6ac009
commit d73b7ba8e1
6 changed files with 121 additions and 58 deletions

View File

@ -11,9 +11,13 @@ require 'uri'
require 'concurrent' require 'concurrent'
require 'rackstash/helpers'
module Rackstash module Rackstash
module Fields module Fields
class AbstractCollection class AbstractCollection
include Rackstash::Helpers::UTF8
# Equality -- Two collections are equal if they are of exactly the same # Equality -- Two collections are equal if they are of exactly the same
# class and contain the same raw data according to `Object#==`. # class and contain the same raw data according to `Object#==`.
# #
@ -87,22 +91,6 @@ module Rackstash
end end
end end
# Encode the given String in UTF-8. If the given `str` is already
# correctly encoded and frozen, we just return it unchanged. In all other
# cases we return a UTF-8 encoded and frozen copy of the string.
#
# @param str [String, #to_s]
# @return [String]
def utf8_encode(str)
if str.instance_of?(String) && str.encoding == Encoding::UTF_8 && str.valid_encoding?
str.frozen? ? str : str.dup.freeze
else
str = str.to_s
str = str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
str.freeze
end
end
def resolve_value(value, scope: nil) def resolve_value(value, scope: nil)
return value unless value.is_a?(Proc) return value unless value.is_a?(Proc)
scope.nil? ? value.call : scope.instance_exec(&value) scope.nil? ? value.call : scope.instance_exec(&value)
@ -189,7 +177,7 @@ module Rackstash
return normalize(value, scope: scope, wrap: wrap) return normalize(value, scope: scope, wrap: wrap)
end end
utf8_encode(value.inspect) utf8_encode(value.inspect.freeze)
end end
end end
end end

View File

@ -76,7 +76,7 @@ module Rackstash
value.flatten! value.flatten!
value value
else else
utf8_encode(value).strip utf8_encode(value).strip.freeze
end end
end end
end end

14
lib/rackstash/helpers.rb Normal file
View File

@ -0,0 +1,14 @@
# frozen_string_literal: true
# Copyright 2017 Holger Just
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE.txt file for details.
module Rackstash
# Some utility function which are used throughout Rackstash.
module Helpers
end
end
require 'rackstash/helpers/utf8'

View File

@ -0,0 +1,31 @@
# frozen_string_literal: true
# Copyright 2017 Holger Just
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE.txt file for details.
module Rackstash
module Helpers
# Provide helper functions to help with UTF8 handling of Strings.
module UTF8
protected
# Encode the given String in UTF-8. If the given `str` is already
# correctly encoded and frozen, we just return it unchanged. In all other
# cases we return a UTF-8 encoded and frozen copy of the string.
#
# @param str [String, #to_s]
# @return [String]
def utf8_encode(str)
if str.instance_of?(String) && str.encoding == Encoding::UTF_8 && str.valid_encoding?
str.frozen? ? str : str.dup.freeze
else
str = str.to_s
str = str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
str.freeze
end
end
end
end
end

View File

@ -116,47 +116,14 @@ describe Rackstash::Fields::AbstractCollection do
end end
describe '#normalize' do describe '#normalize' do
describe 'with String' do it 'encodes Strings to UTF-8' do
it 'transforms encoding to UTF-8' do utf8_str = 'Dönerstraße'
utf8_str = 'Dönerstraße' latin_str = utf8_str.encode(Encoding::ISO8859_9)
latin_str = utf8_str.encode(Encoding::ISO8859_9) expect(latin_str.encoding).to eql Encoding::ISO8859_9
expect(latin_str.encoding).to eql Encoding::ISO8859_9
expect(normalize(latin_str)).to eql utf8_str expect(normalize(latin_str)).to eql utf8_str
expect(normalize(latin_str).encoding).to eql Encoding::UTF_8 expect(normalize(latin_str).encoding).to eql Encoding::UTF_8
expect(normalize(latin_str)).to be_frozen expect(normalize(latin_str)).to be_frozen
end
it 'replaces invalid characters in correctly encoded strings' do
binary = Digest::SHA256.digest('string')
expect(normalize(binary)).to include '<27>'
expect(normalize(binary).encoding).to eql Encoding::UTF_8
expect(normalize(binary)).to be_frozen
end
it 'replaces invalid characters in incorrectly encoded strings' do
strange = Digest::SHA256.digest('string').force_encoding(Encoding::UTF_8)
expect(normalize(strange)).to include '<27>'
expect(normalize(strange).encoding).to eql Encoding::UTF_8
expect(normalize(strange)).to be_frozen
end
it 'dups and freezes valid strings' do
valid = String.new('Dönerstraße')
expect(valid).to_not be_frozen
expect(normalize(valid)).to eql(valid)
# Not object-equal since the string was dup'ed
expect(normalize(valid)).not_to equal valid
expect(normalize(valid)).to be_frozen
end
it 'does not alter valid frozen strings' do
valid = 'Dönerstraße'.freeze
expect(normalize(valid)).to equal(valid)
end
end end
it 'transforms Symbol to String' do it 'transforms Symbol to String' do

View File

@ -0,0 +1,63 @@
# frozen_string_literal: true
# Copyright 2017 Holger Just
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE.txt file for details.
require 'spec_helper'
require 'rackstash/helpers/utf8'
describe Rackstash::Helpers::UTF8 do
it 'only defines protected methods' do
expect(described_class.public_instance_methods(false)).to be_empty
end
describe '#utf8_encode' do
def utf8_encode(*args)
Object.new.extend(described_class).send(:utf8_encode, *args)
end
it 'transforms encoding to UTF-8' do
utf8_str = 'Dönerstraße'
latin_str = utf8_str.encode(Encoding::ISO8859_9)
expect(latin_str.encoding).to eql Encoding::ISO8859_9
expect(utf8_encode(latin_str)).to eql utf8_str
expect(utf8_encode(latin_str).encoding).to eql Encoding::UTF_8
expect(utf8_encode(latin_str)).to be_frozen
end
it 'replaces invalid characters in correctly encoded strings' do
binary = Digest::SHA256.digest('string')
expect(utf8_encode(binary)).to include '<27>'
expect(utf8_encode(binary).encoding).to eql Encoding::UTF_8
expect(utf8_encode(binary)).to be_frozen
end
it 'replaces invalid characters in incorrectly encoded strings' do
strange = Digest::SHA256.digest('string').force_encoding(Encoding::UTF_8)
expect(utf8_encode(strange)).to include '<27>'
expect(utf8_encode(strange).encoding).to eql Encoding::UTF_8
expect(utf8_encode(strange)).to be_frozen
end
it 'dups and freezes valid strings' do
valid = String.new('Dönerstraße')
expect(valid).to_not be_frozen
expect(utf8_encode(valid)).to eql(valid)
# Not object-equal since the string was dup'ed
expect(utf8_encode(valid)).not_to equal valid
expect(utf8_encode(valid)).to be_frozen
end
it 'does not alter valid frozen strings' do
valid = 'Dönerstraße'.freeze
expect(utf8_encode(valid)).to equal(valid)
end
end
end