trstr: faster searching for non-regex patterns - neatvi - [fork] simple vi-type… | |
git clone git://src.adamsgaard.dk/neatvi | |
Log | |
Files | |
Refs | |
README | |
--- | |
commit 2c0f3f357bcd4446492330f2313464c3c6348343 | |
parent 7df341b1420aeeae6a9da53a5209762eda00341d | |
Author: Ali Gholami Rudi <[email protected]> | |
Date: Sat, 20 Nov 2021 21:57:26 +0330 | |
rstr: faster searching for non-regex patterns | |
Diffstat: | |
M Makefile | 2 +- | |
M ex.c | 34 +++++++++++++++--------------… | |
M mot.c | 8 ++++---- | |
A rstr.c | 115 +++++++++++++++++++++++++++++… | |
M vi.h | 8 ++++++-- | |
5 files changed, 142 insertions(+), 25 deletions(-) | |
--- | |
diff --git a/Makefile b/Makefile | |
t@@ -3,7 +3,7 @@ CFLAGS = -Wall -O2 | |
LDFLAGS = | |
OBJS = vi.o ex.o lbuf.o mot.o sbuf.o ren.o dir.o syn.o reg.o led.o \ | |
- uc.o term.o rset.o regex.o cmd.o conf.o | |
+ uc.o term.o rset.o rstr.o regex.o cmd.o conf.o | |
all: vi | |
diff --git a/ex.c b/ex.c | |
t@@ -167,8 +167,8 @@ static int ex_search(char **pat) | |
struct sbuf *kw; | |
char *b = *pat; | |
char *e = b; | |
- char *pats[1]; | |
- struct rset *re; | |
+ char *pat_re; | |
+ struct rstr *re; | |
int dir, row; | |
kw = sbuf_make(); | |
while (*++e) { | |
t@@ -182,18 +182,18 @@ static int ex_search(char **pat) | |
ex_kwdset(sbuf_buf(kw), **pat == '/' ? 1 : -1); | |
sbuf_free(kw); | |
*pat = *e ? e + 1 : e; | |
- if (ex_kwd(&pats[0], &dir)) | |
+ if (ex_kwd(&pat_re, &dir)) | |
return -1; | |
- re = rset_make(1, pats, xic ? RE_ICASE : 0); | |
+ re = rstr_make(pat_re, xic ? RE_ICASE : 0); | |
if (!re) | |
return -1; | |
row = xrow + dir; | |
while (row >= 0 && row < lbuf_len(xb)) { | |
- if (rset_find(re, lbuf_get(xb, row), 0, NULL, 0) >= 0) | |
+ if (rstr_find(re, lbuf_get(xb, row), 0, NULL, 0) >= 0) | |
break; | |
row += dir; | |
} | |
- rset_free(re); | |
+ rstr_free(re); | |
return row >= 0 && row < lbuf_len(xb) ? row : -1; | |
} | |
t@@ -604,10 +604,9 @@ static void replace(struct sbuf *dst, char *rep, char *ln… | |
static int ec_substitute(char *loc, char *cmd, char *arg) | |
{ | |
- struct rset *re; | |
+ struct rstr *re; | |
int offs[32]; | |
int beg, end; | |
- char *pats[1]; | |
char *pat = NULL, *rep = NULL; | |
char *s = arg; | |
int i; | |
t@@ -624,15 +623,15 @@ static int ec_substitute(char *loc, char *cmd, char *arg) | |
snprintf(xrep, sizeof(xrep), "%s", rep ? rep : ""); | |
free(pat); | |
free(rep); | |
- if (ex_kwd(&pats[0], NULL)) | |
+ if (ex_kwd(&pat, NULL)) | |
return 1; | |
- re = rset_make(1, pats, xic ? RE_ICASE : 0); | |
+ re = rstr_make(pat, xic ? RE_ICASE : 0); | |
if (!re) | |
return 1; | |
for (i = beg; i < end; i++) { | |
char *ln = lbuf_get(xb, i); | |
struct sbuf *r = NULL; | |
- while (rset_find(re, ln, LEN(offs) / 2, offs, 0) >= 0) { | |
+ while (rstr_find(re, ln, LEN(offs) / 2, offs, 0) >= 0) { | |
if (!r) | |
r = sbuf_make(); | |
sbuf_mem(r, ln, offs[0]); | |
t@@ -649,7 +648,7 @@ static int ec_substitute(char *loc, char *cmd, char *arg) | |
sbuf_free(r); | |
} | |
} | |
- rset_free(re); | |
+ rstr_free(re); | |
return 0; | |
} | |
t@@ -716,10 +715,9 @@ static int ex_exec(char *ln); | |
static int ec_glob(char *loc, char *cmd, char *arg) | |
{ | |
- struct rset *re; | |
+ struct rstr *re; | |
int offs[32]; | |
int beg, end, not; | |
- char *pats[1]; | |
char *pat; | |
char *s = arg; | |
int i; | |
t@@ -732,9 +730,9 @@ static int ec_glob(char *loc, char *cmd, char *arg) | |
if (pat && pat[0]) | |
ex_kwdset(pat, +1); | |
free(pat); | |
- if (ex_kwd(&pats[0], NULL)) | |
+ if (ex_kwd(&pat, NULL)) | |
return 1; | |
- if (!(re = rset_make(1, pats, xic ? RE_ICASE : 0))) | |
+ if (!(re = rstr_make(pat, xic ? RE_ICASE : 0))) | |
return 1; | |
xgdep++; | |
for (i = beg + 1; i < end; i++) | |
t@@ -742,7 +740,7 @@ static int ec_glob(char *loc, char *cmd, char *arg) | |
i = beg; | |
while (i < lbuf_len(xb)) { | |
char *ln = lbuf_get(xb, i); | |
- if ((rset_find(re, ln, LEN(offs) / 2, offs, 0) < 0) == not) { | |
+ if ((rstr_find(re, ln, LEN(offs) / 2, offs, 0) < 0) == not) { | |
xrow = i; | |
if (ex_exec(s)) | |
break; | |
t@@ -754,7 +752,7 @@ static int ec_glob(char *loc, char *cmd, char *arg) | |
for (i = 0; i < lbuf_len(xb); i++) | |
lbuf_globget(xb, i, xgdep); | |
xgdep--; | |
- rset_free(re); | |
+ rstr_free(re); | |
return 0; | |
} | |
diff --git a/mot.c b/mot.c | |
t@@ -55,13 +55,13 @@ int lbuf_search(struct lbuf *lb, char *kw, int dir, int *r… | |
int found = 0; | |
int r0 = *r, o0 = *o; | |
int i; | |
- struct rset *re = rset_make(1, &kw, xic ? RE_ICASE : 0); | |
+ struct rstr *re = rstr_make(kw, xic ? RE_ICASE : 0); | |
if (!re) | |
return 1; | |
for (i = r0; !found && i >= 0 && i < lbuf_len(lb); i += dir) { | |
char *s = lbuf_get(lb, i); | |
int off = dir > 0 && r0 == i ? uc_chr(s, o0 + 1) - s : 0; | |
- while (rset_find(re, s + off, 1, offs, | |
+ while (rstr_find(re, s + off, 1, offs, | |
off ? RE_NOTBOL : 0) >= 0) { | |
if (dir < 0 && r0 == i && | |
uc_off(s, off + offs[0]) >= o0) | |
t@@ -71,11 +71,11 @@ int lbuf_search(struct lbuf *lb, char *kw, int dir, int *r… | |
*r = i; | |
*len = uc_off(s + off + offs[0], offs[1] - offs[0]); | |
off += offs[1] > offs[0] ? offs[1] : offs[1] + 1; | |
- if (dir > 0) | |
+ if (dir > 0 || !s[off]) | |
break; | |
} | |
} | |
- rset_free(re); | |
+ rstr_free(re); | |
return !found; | |
} | |
diff --git a/rstr.c b/rstr.c | |
t@@ -0,0 +1,115 @@ | |
+#include <ctype.h> | |
+#include <stdlib.h> | |
+#include <stdio.h> | |
+#include <string.h> | |
+#include "vi.h" | |
+ | |
+struct rstr { | |
+ struct rset *rs; /* the compiled regular expression */ | |
+ char *str; /* simple search string */ | |
+ int icase; | |
+ int lbeg, lend; | |
+ int wbeg, wend; | |
+}; | |
+ | |
+static int rstr_simple(struct rstr *rs, char *re) | |
+{ | |
+ char *beg; | |
+ char *end; | |
+ rs->lbeg = re[0] == '^'; | |
+ if (rs->lbeg) | |
+ re++; | |
+ rs->wbeg = re[0] == '\\' && re[1] == '<'; | |
+ if (rs->wbeg) | |
+ re += 2; | |
+ beg = re; | |
+ while (re[0] && !strchr("\\.*+?[]{}()$", (unsigned char) re[0])) | |
+ re++; | |
+ end = re; | |
+ rs->wend = re[0] == '\\' && re[1] == '>'; | |
+ if (rs->wend) | |
+ re += 2; | |
+ rs->lend = re[0] == '$'; | |
+ if (rs->lend) | |
+ re++; | |
+ if (!re[0]) { | |
+ int len = end - beg; | |
+ rs->str = malloc(len + 1); | |
+ memcpy(rs->str, beg, len); | |
+ rs->str[len] = '\0'; | |
+ return 0; | |
+ } | |
+ return 1; | |
+} | |
+ | |
+struct rstr *rstr_make(char *re, int flg) | |
+{ | |
+ struct rstr *rs = malloc(sizeof(*rs)); | |
+ memset(rs, 0, sizeof(*rs)); | |
+ rs->icase = flg & RE_ICASE; | |
+ if (rstr_simple(rs, re)) | |
+ rs->rs = rset_make(1, &re, flg); | |
+ if (!rs->rs && !rs->str) { | |
+ free(rs); | |
+ return NULL; | |
+ } | |
+ return rs; | |
+} | |
+ | |
+static int isword(char *s) | |
+{ | |
+ int c = (unsigned char) s[0]; | |
+ return isalnum(c) || c == '_' || c > 127; | |
+} | |
+ | |
+static int match_case(char *s, char *r, int icase) | |
+{ | |
+ for (; *r && *s; s++, r++) { | |
+ if (!icase && *s != *r) | |
+ return 1; | |
+ if (icase && tolower((unsigned char) *s) != tolower((unsigned … | |
+ return 1; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+/* return zero if an occurrence is found */ | |
+int rstr_find(struct rstr *rs, char *s, int n, int *grps, int flg) | |
+{ | |
+ int len; | |
+ char *beg, *end; | |
+ char *r; | |
+ if (rs->rs) | |
+ return rset_find(rs->rs, s, n, grps, flg); | |
+ if ((rs->lbeg && (flg & RE_NOTBOL)) || (rs->lend && (flg & RE_NOTEOL))) | |
+ return -1; | |
+ len = strlen(rs->str); | |
+ beg = s; | |
+ end = s + strlen(s) - len - 1; | |
+ if (rs->lbeg) | |
+ end = beg; | |
+ if (rs->lend) | |
+ beg = end; | |
+ for (r = beg; r <= end; r++) { | |
+ if (rs->wbeg && r > s && (isword(r - 1) || !isword(r))) | |
+ continue; | |
+ if (rs->wend && r[len] && (!isword(r + len - 1) || isword(r + … | |
+ continue; | |
+ if (!match_case(r, rs->str, rs->icase)) { | |
+ if (n >= 1) { | |
+ grps[0] = r - s; | |
+ grps[1] = r - s + len; | |
+ } | |
+ return 0; | |
+ } | |
+ } | |
+ return -1; | |
+} | |
+ | |
+void rstr_free(struct rstr *rs) | |
+{ | |
+ if (rs->rs) | |
+ rset_free(rs->rs); | |
+ free(rs->str); | |
+ free(rs); | |
+} | |
diff --git a/vi.h b/vi.h | |
t@@ -45,15 +45,19 @@ void sbuf_printf(struct sbuf *sbuf, char *s, ...); | |
int sbuf_len(struct sbuf *sb); | |
void sbuf_cut(struct sbuf *s, int len); | |
-/* regular expression sets */ | |
+/* regular expressions */ | |
#define RE_ICASE 1 | |
#define RE_NOTBOL 2 | |
#define RE_NOTEOL 4 | |
- | |
+/* regular expression sets: searching for multiple regular expressions */ | |
struct rset *rset_make(int n, char **pat, int flg); | |
int rset_find(struct rset *re, char *s, int n, int *grps, int flg); | |
void rset_free(struct rset *re); | |
char *re_read(char **src); | |
+/* searching for a single pattern regular expression */ | |
+struct rstr *rstr_make(char *re, int flg); | |
+int rstr_find(struct rstr *rs, char *s, int n, int *grps, int flg); | |
+void rstr_free(struct rstr *rs); | |
/* rendering lines */ | |
int *ren_position(char *s); |