struct Char

Overview

A Char represents a Unicode code point. It occupies 32 bits.

It is created by enclosing an UTF-8 character in single quotes.

'a'
'z'
'0'
'_'
'あ'

You can use a backslash to denote some characters:

'\'' # single quote
'\\' # backslash
'\e' # escape
'\f' # form feed
'\n' # newline
'\r' # carriage return
'\t' # tab
'\v' # vertical tab

You can use a backslash followed by at most three digits to denote a code point written in octal:

'\101' # == 'A'
'\123' # == 'S'
'\12'  # == '\n'
'\1'   # code point 1

You can use a backslash followed by an u and four hexadecimal characters to denote a unicode codepoint written:

'\u0041' # == 'A'

Or you can use curly braces and specify up to four hexadecimal numbers:

'\u{41}' # == 'A'

Included Modules

Defined in:

Constant Summary

MAX = 1114111.unsafe_chr

The maximum character

MAX_CODEPOINT = 1114111

The maximum valid codepoint for a character

ZERO = '\u{0}'

The character representing the end of a C string.

Instance Method Summary

Instance methods inherited from module Comparable(Char)

<(other : T) <, <=(other : T) <=, <=>(other : T) <=>, ==(other : T) ==, >(other : T) >, >=(other : T) >=

Instance methods inherited from struct Value

==(other) ==, dup dup

Instance methods inherited from class Object

!=(other) !=, !~(other) !~, ==(other) ==, ===(other : JSON::Any)
===(other : YAML::Any)
===(other)
===
, =~(other) =~, class class, dup dup, hash hash, inspect(io : IO)
inspect
inspect
, itself itself, not_nil! not_nil!, pretty_inspect(width = 79, newline = "\n", indent = 0) : String pretty_inspect, pretty_print(pp : PrettyPrint) : Nil pretty_print, tap(&block) tap, to_json to_json, to_pretty_json(indent : String = " ")
to_pretty_json(io : IO, indent : String = " ")
to_pretty_json
, to_s
to_s(io : IO)
to_s
, to_yaml(io : IO)
to_yaml
to_yaml
, try(&block) try

Class methods inherited from class Object

==(other : Class) ==, ===(other) ===, cast(other) : self cast, clone clone, dup dup, from_json(string_or_io, root : String) : self
from_json(string_or_io) : self
from_json
, from_yaml(string_or_io) : self from_yaml, hash hash, inspect(io) inspect, name : String name, nilable? nilable?, to_s(io) to_s, |(other : U.class) forall U |

Instance Method Detail

def !=(other : Char) : Bool #

Returns true if self's codepoint is not equal to other's codepoint.


def +(str : String) #

Concatenates this char and string.

'f' + "oo" # => "foo"

def +(other : Int) : Char #

Returns a char that has this char's codepoint plus other.

'a' + 1 # => 'b'
'a' + 2 # => 'c'

def -(other : Char) #

Returns the difference of the codepoint values of this char and other.

'a' - 'a' # => 0
'b' - 'a' # => 1
'c' - 'a' # => 2

def -(other : Int) : Char #

Returns a char that has this char's codepoint minus other.

'c' - 1 # => 'b'
'c' - 2 # => 'a'

def <(other : Char) : Bool #

Returns true if self's codepoint is less than other's codepoint.


def <=(other : Char) : Bool #

Returns true if self's codepoint is less than or equal to other's codepoint.


def <=>(other : Char) #

Implements the comparison operator.

'a' <=> 'c' # => -2

def ==(other : Char) : Bool #

Returns true if self's codepoint is equal to other's codepoint.


def ===(byte : Int) #

Returns true if the codepoint is equal to byte ignoring the type.

'c'.ord       # => 99
'c' === 99_u8 # => true
'c' === 99    # => true
'z' === 99    # => false

def >(other : Char) : Bool #

Returns true if self's codepoint is greater than other's codepoint.


def >=(other : Char) : Bool #

Returns true if self's codepoint is greater than or equal to other's codepoint.


def alpha? #

DEPRECATED use #ascii_letter? or #letter?. This method will be removed after 0.20.0.


def alphanumeric? #

Returns true if this char is a letter or a number according to unicode.

'c'.alphanumeric? # => true
'8'.alphanumeric? # => true
'.'.alphanumeric? # => false

def ascii? #

Returns true if this char is an ASCII character (codepoint is in (0..127))


def ascii_alphanumeric? #

Returns true if this char is an ASCII letter or number ('0' to '9', 'a' to 'z', 'A' to 'Z').

'c'.ascii_alphanumeric? # => true
'8'.ascii_alphanumeric? # => true
'.'.ascii_alphanumeric? # => false

def ascii_control? #

Returns true if this char is an ASCII control character.

('\u0000'..'\u0019').each do |char|
  char.control? # => true
end

('\u007F'..'\u009F').each do |char|
  char.control? # => true
end

def ascii_letter? #

Returns true if this char is an ASCII letter ('a' to 'z', 'A' to 'Z').

'c'.ascii_letter? # => true
'á'.ascii_letter? # => false
'8'.ascii_letter? # => false

def ascii_lowercase? #

Returns true if this char is a lowercase ASCII letter.

'c'.ascii_lowercase? # => true
'ç'.lowercase?       # => false
'G'.ascii_lowercase? # => false
'.'.ascii_lowercase? # => false

def ascii_number?(base : Int = 10) #

Returns true if this char is an ASCII number in specified base.

Base can be from 0 to 36 with digits from '0' to '9' and 'a' to 'z' or 'A' to 'Z'.

'4'.ascii_number?     # => true
'z'.ascii_number?     # => false
'z'.ascii_number?(36) # => true

def ascii_uppercase? #

Returns true if this char is an ASCII uppercase letter.

'H'.ascii_uppercase? # => true
'Á'.ascii_uppercase? # => false
'c'.ascii_uppercase? # => false
'.'.ascii_uppercase? # => false

def ascii_whitespace? #

Returns true if this char is an ASCII whitespace.

' '.ascii_whitespace?  # => true
'\t'.ascii_whitespace? # => true
'b'.ascii_whitespace?  # => false

def bytes #

Returns this char bytes as encoded by UTF-8, as an Array(UInt8).

'a'.bytes # => [97]
'あ'.bytes # => [227, 129, 130]

def bytesize #

Returns the number of UTF-8 bytes in this char.

'a'.bytesize # => 1
'好'.bytesize # => 3

def clone #

def control? #

Returns true if this char is a control character according to unicode.


def digit?(base : Int = 10) #

DEPRECATED use #ascii_number? or #number?. This method will be removed after 0.20.0.


def downcase(options = Unicode::CaseOptions::None) #

Returns the downcase equivalent of this char.

Note that this only works for characters whose downcase equivalent yields a single codepoint. There are a few characters, like 'İ', than when downcased result in multiple characters (in this case: 'I' and the dot mark).

For a more correct method see the method that receives a block.

'Z'.downcase # => 'z'
'x'.downcase # => 'x'
'.'.downcase # => '.'

def downcase(options = Unicode::CaseOptions::None, &block) #

Yields each char for the downcase equivalent of this char.

This method takes into account the possibility that an downcase version of a char might result in multiple chars, like for 'İ', which results in 'i' and a dot mark.


def dump(io) #

Appends this char as a string that contains a char literal to the given IO.

See #dump.


def dump #

Returns this char as a string that contains a char literal as written in Crystal, with characters with a codepoint greater than 0x79 written as \u{...}.

'a'.dump      # => "'a'"
'\t'.dump     # => "'\t'"
'あ'.dump      # => "'\u{3042}'"
'\u0012'.dump # => "'\u{12}'"

def each_byte(&block) #

Yields each of the bytes of this char as encoded by UTF-8.

puts "'a'"
'a'.each_byte do |byte|
  puts byte
end
puts

puts "'あ'"
'あ'.each_byte do |byte|
  puts byte
end

Output:

'a'
97

'あ'
227
129
130

def hash #

Returns this char's codepoint.


def hex? #

Returns true if this char is an ASCII hex digit ('0' to '9', 'a' to 'z', 'A' to 'Z').

'5'.hex? # => true
'a'.hex? # => true
'F'.hex? # => true
'g'.hex? # => false

def in_set?(*sets : String) #

Returns true if this char is matched by the given sets.

Each parameter defines a set, the character is matched against the intersection of those, in other words it needs to match all sets.

If a set starts with a ^, it is negated. The sequence c1-c2 means all characters between and including c1 and c2 and is known as a range.

The backslash character \ can be used to escape ^ or - and is otherwise ignored unless it appears at the end of a range or the end of a a set.

'l'.in_set? "lo"          # => true
'l'.in_set? "lo", "o"     # => false
'l'.in_set? "hello", "^l" # => false
'l'.in_set? "j-m"         # => true

'^'.in_set? "\\^aeiou" # => true
'-'.in_set? "a\\-eo"   # => true

'\\'.in_set? "\\"    # => true
'\\'.in_set? "\\A"   # => false
'\\'.in_set? "X-\\w" # => true

def inspect(io) #

Appends this char as a string that contains a char literal to the given IO.

See #inspect.


def inspect #

Returns this char as a string that contains a char literal.

'a'.inspect      # => "'a'"
'\t'.inspect     # => "'\t'"
'あ'.inspect      # => "'あ'"
'\u0012'.inspect # => "'\u{12}'"

def letter? #

Returns true if this char is a letter.

'c'.letter? # => true
'á'.letter? # => true
'8'.letter? # => false

def lowercase? #

Returns true if this char is a lowercase letter.

'c'.lowercase? # => true
'ç'.lowercase? # => true
'G'.lowercase? # => false
'.'.lowercase? # => false

def mark? #

Returns true if this is char is a mark character according to unicode.


def number? #

Returns true if this char is a number according to unicode.

'1'.number? # => true
'a'.number? # => false

def ord : Int32 #

Returns the codepoint of this char.

The codepoint is the integer representation. The Universal Coded Character Set (UCS) standard, commonly known as Unicode, assigns names and meanings to numbers, these numbers are called codepoints.

For values below and including 127 this matches the ASCII codes and thus its byte representation.

'a'.ord      # => 97
'\0'.ord     # => 0
'\u007f'.ord # => 127
'☃'.ord      # => 9731

def pred #

Returns a Char that is one codepoint smaller than this char's codepoint.

'b'.pred # => 'a'
'ぃ'.pred # => 'あ'

def succ #

Returns a Char that is one codepoint bigger than this char's codepoint.

'a'.succ # => 'b'
'あ'.succ # => 'ぃ'

This method allows creating a Range of chars.


def to_f #

Returns the integer value of this char as a float if it's an ASCII char denoting a digit, raises otherwise.

'1'.to_i # => 1.0
'8'.to_i # => 8.0
'c'.to_i # => ArgumentError

def to_f32 #

See #to_f


def to_f32? #

See #to_f?


def to_f64 #

Same as #to_f


def to_f64? #

Same as #to_f?


def to_f? #

Returns the integer value of this char as a float if it's an ASCII char denoting a digit, nil otherwise.

'1'.to_i # => 1.0
'8'.to_i # => 8.0
'c'.to_i # => ArgumentError

def to_i(base : Int = 10) : Int32 #

Returns the integer value of this char if it's an ASCII char denoting a digit in base, raises otherwise.

'1'.to_i     # => 1
'8'.to_i     # => 8
'c'.to_i     # => ArgumentError
'1'.to_i(16) # => 1
'a'.to_i(16) # => 10
'f'.to_i(16) # => 15
'z'.to_i(16) # => ArgumentError

def to_i16(base : Int = 10) #

See #to_i


def to_i16?(base : Int = 10) #

See #to_i?


def to_i32(base : Int = 10) : Int32 #

Same as #to_i


def to_i32?(base : Int = 10) : Int32? #

Same as #to_i?


def to_i64(base : Int = 10) #

See #to_i


def to_i64?(base : Int = 10) #

See #to_i?


def to_i8(base : Int = 10) #

See #to_i


def to_i8?(base : Int = 10) #

See #to_i?


def to_i?(base : Int = 10) : Int32? #

Returns the integer value of this char if it's an ASCII char denoting a digit in base, nil otherwise.

'1'.to_i     # => 1
'8'.to_i     # => 8
'c'.to_i     # => ArgumentError
'1'.to_i(16) # => 1
'a'.to_i(16) # => 10
'f'.to_i(16) # => 15
'z'.to_i(16) # => ArgumentError

def to_s(io : IO) #

Appends this char to the given IO.

This appends this char's bytes as encoded by UTF-8 to the given IO.


def to_s #

Returns this char as a string containing this char as a single character.

'a'.to_s # => "a"
'あ'.to_s # => "あ"

def to_u16(base : Int = 10) #

See #to_i


def to_u16?(base : Int = 10) #

See #to_i?


def to_u32(base : Int = 10) #

See #to_i


def to_u32?(base : Int = 10) #

See #to_i?


def to_u64(base : Int = 10) #

See #to_i


def to_u64?(base : Int = 10) #

See #to_i?


def to_u8(base : Int = 10) #

See #to_i


def to_u8?(base : Int = 10) #

See #to_i?


def upcase(options = Unicode::CaseOptions::None, &block) #

Yields each char for the upcase equivalent of this char.

This method takes into account the possibility that an upcase version of a char might result in multiple chars, like for 'ffl', which results in 'F', 'F' and 'L'.

'z'.upcase { |v| puts v } # prints 'Z'
'ffl'.upcase { |v| puts v } # prints 'F', 'F', 'F'

def upcase(options = Unicode::CaseOptions::None) #

Returns the upcase equivalent of this char.

Note that this only works for characters whose upcase equivalent yields a single codepoint. There are a few characters, like 'ffl', than when upcased result in multiple characters (in this case: 'F', 'F', 'L').

For a more correct method see the method that receives a block.

'z'.upcase # => 'Z'
'X'.upcase # => 'X'
'.'.upcase # => '.'

def uppercase? #

Returns true if this char is an uppercase letter.

'H'.uppercase? # => true
'Á'.uppercase? # => true
'c'.uppercase? # => false
'.'.uppercase? # => false

def whitespace? #

Returns true if this char is a whitespace according to unicode.

' '.whitespace?  # => true
'\t'.whitespace? # => true
'b'.whitespace?  # => false