SPECIALS |
= |
[ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ] |
|
The order in which the substitutions occur
|
SUBSTITUTES |
= |
['&', '<', '>', '"', ''', ' '] |
SLAICEPS |
= |
[ '<', '>', '"', "'", '&' ] |
|
Characters which are substituted in written strings
|
SETUTITSBUS |
= |
[ /</u, />/u, /"/u, /'/u, /&/u ] |
NEEDS_A_SECOND_CHECK |
= |
/(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um |
NUMERICENTITY |
= |
/�*((?:\d+)|(?:x[a-fA-F0-9]+));/ |
VALID_CHAR |
= |
[ 0x9, 0xA, 0xD, (0x20..0xD7FF), (0xE000..0xFFFD), (0x10000..0x10FFFF) |
VALID_XML_CHARS |
= |
Regexp.new('^['+ VALID_CHAR.map { |item| case item |
VALID_XML_CHARS |
= |
/^( [\x09\x0A\x0D\x20-\x7E] # ASCII | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte | \xEF[\x80-\xBE]{2} # | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 )*$/x; |