-- ************************************************************************
--
-- Filter out control characters from text.
-- Copyright 2018 by Sean Conner. All Rights Reserved.
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation, either version 3 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program. If not, see <
http://www.gnu.org/licenses/>.
--
-- Comments, questions and criticisms can be sent to:
[email protected]
--
-- ************************************************************************
-- luacheck: ignore 611
local lpeg = require "lpeg" -- semver: ~1.0.0
local utf8 = require "org.conman.parsers.utf8.char"
+ require "org.conman.parsers.ascii.char"
local control = require "org.conman.parsers.utf8.control"
+ require "org.conman.parsers.iso.control"
+ require "org.conman.parsers.ascii.control"
local Carg = lpeg.Carg
local Cs = lpeg.Cs
local C = lpeg.C
local P = lpeg.P
local R = lpeg.R
local nc = P"\204" * R"\128\191" -- combining chars
+ P"\205" * R"\128\175" -- combining chars
+ P"\225\170" * R"\176\190" -- combining chars
+ P"\225\183" * R"\128\191" -- combining chars
+ P"\226\131" * R"\144\176" -- combining chars
+ P"\239\184" * R"\160\175" -- combining chars
+ P"\u{00AD}" -- shy hyphen
+ P"\u{1806}" -- Mongolian TODO soft hyphen
+ P"\u{200B}" -- zero width space
+ P"\u{200C}" -- zero-width nonjoiner space
+ P"\u{200D}" -- zero-width joiner space
local c = P"\9" * Carg(1) -- HT
/ function(s)
local rem = 8 - (s.cnt % 8)
s.cnt = s.cnt + rem
return string.rep(' ',rem)
end
+ control / ""
+ nc -- not counted for tabulation
+ C(utf8) * Carg(1) -- counted for tabulation
/ function(c,s)
s.cnt = s.cnt + 1
return c
end
+ P(1) / ""
return Carg(1) / function(s) s.cnt = 0 end
* Cs(c^0)
* Carg(1) / function(ch,s) return ch,s.cnt end