mirror of
https://github.com/meineerde/rackstash.git
synced 2025-10-17 14:01:01 +00:00
Extract utf8_encode helper into Rackstash::Helpers::UTF8 module
This commit is contained in:
parent
2c8d6ac009
commit
d73b7ba8e1
@ -11,9 +11,13 @@ require 'uri'
|
||||
|
||||
require 'concurrent'
|
||||
|
||||
require 'rackstash/helpers'
|
||||
|
||||
module Rackstash
|
||||
module Fields
|
||||
class AbstractCollection
|
||||
include Rackstash::Helpers::UTF8
|
||||
|
||||
# Equality -- Two collections are equal if they are of exactly the same
|
||||
# class and contain the same raw data according to `Object#==`.
|
||||
#
|
||||
@ -87,22 +91,6 @@ module Rackstash
|
||||
end
|
||||
end
|
||||
|
||||
# Encode the given String in UTF-8. If the given `str` is already
|
||||
# correctly encoded and frozen, we just return it unchanged. In all other
|
||||
# cases we return a UTF-8 encoded and frozen copy of the string.
|
||||
#
|
||||
# @param str [String, #to_s]
|
||||
# @return [String]
|
||||
def utf8_encode(str)
|
||||
if str.instance_of?(String) && str.encoding == Encoding::UTF_8 && str.valid_encoding?
|
||||
str.frozen? ? str : str.dup.freeze
|
||||
else
|
||||
str = str.to_s
|
||||
str = str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
|
||||
str.freeze
|
||||
end
|
||||
end
|
||||
|
||||
def resolve_value(value, scope: nil)
|
||||
return value unless value.is_a?(Proc)
|
||||
scope.nil? ? value.call : scope.instance_exec(&value)
|
||||
@ -189,7 +177,7 @@ module Rackstash
|
||||
return normalize(value, scope: scope, wrap: wrap)
|
||||
end
|
||||
|
||||
utf8_encode(value.inspect)
|
||||
utf8_encode(value.inspect.freeze)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@ -76,7 +76,7 @@ module Rackstash
|
||||
value.flatten!
|
||||
value
|
||||
else
|
||||
utf8_encode(value).strip
|
||||
utf8_encode(value).strip.freeze
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
14
lib/rackstash/helpers.rb
Normal file
14
lib/rackstash/helpers.rb
Normal file
@ -0,0 +1,14 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# Copyright 2017 Holger Just
|
||||
#
|
||||
# This software may be modified and distributed under the terms
|
||||
# of the MIT license. See the LICENSE.txt file for details.
|
||||
|
||||
module Rackstash
|
||||
# Some utility function which are used throughout Rackstash.
|
||||
module Helpers
|
||||
end
|
||||
end
|
||||
|
||||
require 'rackstash/helpers/utf8'
|
||||
31
lib/rackstash/helpers/utf8.rb
Normal file
31
lib/rackstash/helpers/utf8.rb
Normal file
@ -0,0 +1,31 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# Copyright 2017 Holger Just
|
||||
#
|
||||
# This software may be modified and distributed under the terms
|
||||
# of the MIT license. See the LICENSE.txt file for details.
|
||||
|
||||
module Rackstash
|
||||
module Helpers
|
||||
# Provide helper functions to help with UTF8 handling of Strings.
|
||||
module UTF8
|
||||
protected
|
||||
|
||||
# Encode the given String in UTF-8. If the given `str` is already
|
||||
# correctly encoded and frozen, we just return it unchanged. In all other
|
||||
# cases we return a UTF-8 encoded and frozen copy of the string.
|
||||
#
|
||||
# @param str [String, #to_s]
|
||||
# @return [String]
|
||||
def utf8_encode(str)
|
||||
if str.instance_of?(String) && str.encoding == Encoding::UTF_8 && str.valid_encoding?
|
||||
str.frozen? ? str : str.dup.freeze
|
||||
else
|
||||
str = str.to_s
|
||||
str = str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
|
||||
str.freeze
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -116,47 +116,14 @@ describe Rackstash::Fields::AbstractCollection do
|
||||
end
|
||||
|
||||
describe '#normalize' do
|
||||
describe 'with String' do
|
||||
it 'transforms encoding to UTF-8' do
|
||||
utf8_str = 'Dönerstraße'
|
||||
latin_str = utf8_str.encode(Encoding::ISO8859_9)
|
||||
expect(latin_str.encoding).to eql Encoding::ISO8859_9
|
||||
it 'encodes Strings to UTF-8' do
|
||||
utf8_str = 'Dönerstraße'
|
||||
latin_str = utf8_str.encode(Encoding::ISO8859_9)
|
||||
expect(latin_str.encoding).to eql Encoding::ISO8859_9
|
||||
|
||||
expect(normalize(latin_str)).to eql utf8_str
|
||||
expect(normalize(latin_str).encoding).to eql Encoding::UTF_8
|
||||
expect(normalize(latin_str)).to be_frozen
|
||||
end
|
||||
|
||||
it 'replaces invalid characters in correctly encoded strings' do
|
||||
binary = Digest::SHA256.digest('string')
|
||||
|
||||
expect(normalize(binary)).to include '<27>'
|
||||
expect(normalize(binary).encoding).to eql Encoding::UTF_8
|
||||
expect(normalize(binary)).to be_frozen
|
||||
end
|
||||
|
||||
it 'replaces invalid characters in incorrectly encoded strings' do
|
||||
strange = Digest::SHA256.digest('string').force_encoding(Encoding::UTF_8)
|
||||
|
||||
expect(normalize(strange)).to include '<27>'
|
||||
expect(normalize(strange).encoding).to eql Encoding::UTF_8
|
||||
expect(normalize(strange)).to be_frozen
|
||||
end
|
||||
|
||||
it 'dups and freezes valid strings' do
|
||||
valid = String.new('Dönerstraße')
|
||||
expect(valid).to_not be_frozen
|
||||
|
||||
expect(normalize(valid)).to eql(valid)
|
||||
# Not object-equal since the string was dup'ed
|
||||
expect(normalize(valid)).not_to equal valid
|
||||
expect(normalize(valid)).to be_frozen
|
||||
end
|
||||
|
||||
it 'does not alter valid frozen strings' do
|
||||
valid = 'Dönerstraße'.freeze
|
||||
expect(normalize(valid)).to equal(valid)
|
||||
end
|
||||
expect(normalize(latin_str)).to eql utf8_str
|
||||
expect(normalize(latin_str).encoding).to eql Encoding::UTF_8
|
||||
expect(normalize(latin_str)).to be_frozen
|
||||
end
|
||||
|
||||
it 'transforms Symbol to String' do
|
||||
|
||||
63
spec/rackstash/helpers/utf8_spec.rb
Normal file
63
spec/rackstash/helpers/utf8_spec.rb
Normal file
@ -0,0 +1,63 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# Copyright 2017 Holger Just
|
||||
#
|
||||
# This software may be modified and distributed under the terms
|
||||
# of the MIT license. See the LICENSE.txt file for details.
|
||||
|
||||
require 'spec_helper'
|
||||
|
||||
require 'rackstash/helpers/utf8'
|
||||
|
||||
describe Rackstash::Helpers::UTF8 do
|
||||
it 'only defines protected methods' do
|
||||
expect(described_class.public_instance_methods(false)).to be_empty
|
||||
end
|
||||
|
||||
describe '#utf8_encode' do
|
||||
def utf8_encode(*args)
|
||||
Object.new.extend(described_class).send(:utf8_encode, *args)
|
||||
end
|
||||
|
||||
it 'transforms encoding to UTF-8' do
|
||||
utf8_str = 'Dönerstraße'
|
||||
latin_str = utf8_str.encode(Encoding::ISO8859_9)
|
||||
expect(latin_str.encoding).to eql Encoding::ISO8859_9
|
||||
|
||||
expect(utf8_encode(latin_str)).to eql utf8_str
|
||||
expect(utf8_encode(latin_str).encoding).to eql Encoding::UTF_8
|
||||
expect(utf8_encode(latin_str)).to be_frozen
|
||||
end
|
||||
|
||||
it 'replaces invalid characters in correctly encoded strings' do
|
||||
binary = Digest::SHA256.digest('string')
|
||||
|
||||
expect(utf8_encode(binary)).to include '<27>'
|
||||
expect(utf8_encode(binary).encoding).to eql Encoding::UTF_8
|
||||
expect(utf8_encode(binary)).to be_frozen
|
||||
end
|
||||
|
||||
it 'replaces invalid characters in incorrectly encoded strings' do
|
||||
strange = Digest::SHA256.digest('string').force_encoding(Encoding::UTF_8)
|
||||
|
||||
expect(utf8_encode(strange)).to include '<27>'
|
||||
expect(utf8_encode(strange).encoding).to eql Encoding::UTF_8
|
||||
expect(utf8_encode(strange)).to be_frozen
|
||||
end
|
||||
|
||||
it 'dups and freezes valid strings' do
|
||||
valid = String.new('Dönerstraße')
|
||||
expect(valid).to_not be_frozen
|
||||
|
||||
expect(utf8_encode(valid)).to eql(valid)
|
||||
# Not object-equal since the string was dup'ed
|
||||
expect(utf8_encode(valid)).not_to equal valid
|
||||
expect(utf8_encode(valid)).to be_frozen
|
||||
end
|
||||
|
||||
it 'does not alter valid frozen strings' do
|
||||
valid = 'Dönerstraße'.freeze
|
||||
expect(utf8_encode(valid)).to equal(valid)
|
||||
end
|
||||
end
|
||||
end
|
||||
Loading…
x
Reference in New Issue
Block a user