Revise uses of encoding APIs.

When originally implemented, we still supported Ruby 1.8, which
necessitated checking for encoding methods and using a regex to validate
UTF-8. These checks are now gone.

We tagged many strings as binary when not strictly necessary, either
because we were just going to iterate their bytes or because we were
going to hand them off to the caller which should just write them
directly to a socket. Strings used as buffers to accumulate streaming
input are still tagged as binary to avoid encoding
collision/conversion.

The places where we do need to tag as UTF-8 (i.e. just before emitting
to the application) remain, but copy the string if necessary. This
allows us to work with frozen strings.

Finally, strings passed in via the Driver#text method should be
*transcoded* to UTF-8 if necessary, not merely tagged. The Ruby
String#encode method produces a new string so this should also be safe
with frozen strings.
This commit is contained in:
James Coglan
2016-05-19 21:02:58 +01:00
parent 115d82bebb
commit 9ce857b3d4
12 changed files with 29 additions and 46 deletions
+13 -20
View File
@@ -33,13 +33,12 @@ module WebSocket
end
end
unless String.instance_methods.include?(:force_encoding)
require root + '/utf8_match'
end
MAX_LENGTH = 0x3ffffff
STATES = [:connecting, :open, :closing, :closed]
BINARY = 'ASCII-8BIT'
UNICODE = 'UTF-8'
ConnectEvent = Struct.new(nil)
OpenEvent = Struct.new(nil)
MessageEvent = Struct.new(:data)
@@ -94,13 +93,14 @@ module WebSocket
return false unless @ready_state == 0
response = handshake_response
return false unless response
@socket.write(Driver.encode(response, :binary))
@socket.write(response)
open unless @stage == -1
true
end
def text(message)
frame(message)
message = message.encode(UNICODE) unless message.encoding.name == UNICODE
frame(message, :text)
end
def binary(message)
@@ -159,14 +159,15 @@ module WebSocket
case string
when Array then
string = string.pack('C*')
encoding ||= :binary
encoding ||= BINARY
when String then
encoding ||= :utf8
encoding ||= UNICODE
end
encodings = {:utf8 => 'UTF-8', :binary => 'ASCII-8BIT'}
string.force_encoding(encodings[encoding]) if string.respond_to?(:force_encoding)
return nil if encoding == :utf8 and not valid_utf8?(string)
string
unless string.encoding.name == encoding
string = string.dup if string.frozen?
string.force_encoding(encoding)
end
string.valid_encoding? ? string : nil
end
def self.validate_options(options, valid_keys)
@@ -177,14 +178,6 @@ module WebSocket
end
end
def self.valid_utf8?(string)
if defined?(UTF8_MATCH)
UTF8_MATCH =~ string ? true : false
else
string.valid_encoding?
end
end
def self.websocket?(env)
connection = env['HTTP_CONNECTION'] || ''
upgrade = env['HTTP_UPGRADE'] || ''
+1 -1
View File
@@ -53,7 +53,7 @@ module WebSocket
def start
return false unless @ready_state == -1
@socket.write(Driver.encode(handshake_request, :binary))
@socket.write(handshake_request)
@ready_state = 0
true
end
+1 -1
View File
@@ -56,7 +56,7 @@ module WebSocket
when 2 then
if octet == 0xFF
@stage = 0
emit(:message, MessageEvent.new(Driver.encode(@buffer, :utf8)))
emit(:message, MessageEvent.new(Driver.encode(@buffer, UNICODE)))
else
if @length
@skipped += 1
+2 -2
View File
@@ -8,7 +8,7 @@ module WebSocket
super
input = @socket.env['rack.input']
@stage = -1
@body = Driver.encode(input ? input.read : '', :binary)
@body = (input ? input.read : String.new('')).force_encoding(BINARY)
@headers.clear
@headers['Upgrade'] = 'WebSocket'
@@ -70,7 +70,7 @@ module WebSocket
def send_handshake_body
return unless signature = handshake_signature
@socket.write(Driver.encode(signature, :binary))
@socket.write(signature)
@stage = 0
open
parse(@body[BODY_SIZE..-1]) if @body.bytesize > BODY_SIZE
+3 -7
View File
@@ -129,10 +129,6 @@ module WebSocket
end
end
def text(message)
frame(message, :text)
end
def binary(message)
frame(message, :binary)
end
@@ -356,7 +352,7 @@ module WebSocket
when OPCODES[:close] then
code = (bytesize >= 2) ? payload.unpack(PACK_FORMATS[2]).first : nil
reason = (bytesize > 2) ? Driver.encode(bytes[2..-1] || [], :utf8) : nil
reason = (bytesize > 2) ? Driver.encode(bytes[2..-1] || [], UNICODE) : nil
unless (bytesize == 0) or
(code && code >= MIN_RESERVED_ERROR && code <= MAX_RESERVED_ERROR) or
@@ -374,7 +370,7 @@ module WebSocket
frame(payload, :pong)
when OPCODES[:pong] then
message = Driver.encode(payload, :utf8)
message = Driver.encode(payload, UNICODE)
callback = @ping_callbacks[message]
@ping_callbacks.delete(message)
callback.call if callback
@@ -391,7 +387,7 @@ module WebSocket
case message.opcode
when OPCODES[:text] then
payload = Driver.encode(payload, :utf8)
payload = Driver.encode(payload, UNICODE)
when OPCODES[:binary]
payload = payload.bytes.to_a
end
+1 -1
View File
@@ -14,7 +14,7 @@ module WebSocket
@rsv2 = false
@rsv3 = false
@opcode = nil
@data = Driver.encode('', :binary)
@data = String.new('').force_encoding(BINARY)
end
def <<(frame)
+1 -1
View File
@@ -44,7 +44,7 @@ module WebSocket
start = "CONNECT #{@origin.host}:#{port} HTTP/1.1"
headers = [start, @headers.to_s, '']
@socket.write(Driver.encode(headers.join("\r\n"), :binary))
@socket.write(headers.join("\r\n"))
true
end
+1 -1
View File
@@ -58,7 +58,7 @@ module WebSocket
end
def write(buffer)
@socket.write(Driver.encode(buffer, :binary))
@socket.write(buffer)
end
private
+3 -3
View File
@@ -6,13 +6,13 @@ module WebSocket
MINIMUM_AUTOMATIC_PRUNE_OFFSET = 128
def initialize
@buffer = Driver.encode('', :binary)
@buffer = String.new('').force_encoding(BINARY)
@offset = 0
end
def put(chunk)
return unless chunk and chunk.bytesize > 0
@buffer << Driver.encode(chunk, :binary)
@buffer << chunk.force_encoding(BINARY)
end
# Read bytes from the data:
@@ -42,7 +42,7 @@ module WebSocket
buffer_size = @buffer.bytesize
if @offset > buffer_size
@buffer = Driver.encode('', :binary)
@buffer = String.new('').force_encoding(BINARY)
else
@buffer = @buffer.byteslice(@offset, buffer_size - @offset)
end
-6
View File
@@ -1,6 +0,0 @@
module WebSocket
class Driver
# http://www.w3.org/International/questions/qa-forms-utf-8.en.php
UTF8_MATCH = /^([\x00-\x7F]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/
end
end
+1 -1
View File
@@ -90,7 +90,7 @@ describe WebSocket::Driver::Draft75 do
"WebSocket-Origin: http://www.example.com\r\n" +
"WebSocket-Location: ws://www.example.com/socket\r\n" +
"\r\n")
expect(socket).to receive(:write).with(WebSocket::Driver.encode "\x00Hi\xFF", :binary)
expect(socket).to receive(:write).with(WebSocket::Driver.encode "\x00Hi\xFF", WebSocket::Driver::BINARY)
driver.frame("Hi")
driver.start
+2 -2
View File
@@ -137,7 +137,7 @@ describe WebSocket::Driver::Draft76 do
"Sec-WebSocket-Location: ws://www.example.com/socket\r\n" +
"\r\n")
expect(socket).to receive(:write).with(response)
expect(socket).to receive(:write).with(WebSocket::Driver.encode "\x00Hi\xFF", :binary)
expect(socket).to receive(:write).with(WebSocket::Driver.encode "\x00Hi\xFF", WebSocket::Driver::BINARY)
driver.frame("Hi")
driver.start
@@ -191,7 +191,7 @@ describe WebSocket::Driver::Draft76 do
it "sends any frames queued before the handshake was complete" do
expect(socket).to receive(:write).with(response)
expect(socket).to receive(:write).with(WebSocket::Driver.encode "\x00hello\xFF", :binary)
expect(socket).to receive(:write).with(WebSocket::Driver.encode "\x00hello\xFF", WebSocket::Driver::BINARY)
driver.frame("hello")
driver.parse(body)
expect(@bytes).to eq [0, 104, 101, 108, 108, 111, 255]