Introduction
Introduction Statistics Contact Development Disclaimer Help
trstr: faster searching for non-regex patterns - neatvi - [fork] simple vi-type…
git clone git://src.adamsgaard.dk/neatvi
Log
Files
Refs
README
---
commit 2c0f3f357bcd4446492330f2313464c3c6348343
parent 7df341b1420aeeae6a9da53a5209762eda00341d
Author: Ali Gholami Rudi <[email protected]>
Date: Sat, 20 Nov 2021 21:57:26 +0330
rstr: faster searching for non-regex patterns
Diffstat:
M Makefile | 2 +-
M ex.c | 34 +++++++++++++++--------------…
M mot.c | 8 ++++----
A rstr.c | 115 +++++++++++++++++++++++++++++…
M vi.h | 8 ++++++--
5 files changed, 142 insertions(+), 25 deletions(-)
---
diff --git a/Makefile b/Makefile
t@@ -3,7 +3,7 @@ CFLAGS = -Wall -O2
LDFLAGS =
OBJS = vi.o ex.o lbuf.o mot.o sbuf.o ren.o dir.o syn.o reg.o led.o \
- uc.o term.o rset.o regex.o cmd.o conf.o
+ uc.o term.o rset.o rstr.o regex.o cmd.o conf.o
all: vi
diff --git a/ex.c b/ex.c
t@@ -167,8 +167,8 @@ static int ex_search(char **pat)
struct sbuf *kw;
char *b = *pat;
char *e = b;
- char *pats[1];
- struct rset *re;
+ char *pat_re;
+ struct rstr *re;
int dir, row;
kw = sbuf_make();
while (*++e) {
t@@ -182,18 +182,18 @@ static int ex_search(char **pat)
ex_kwdset(sbuf_buf(kw), **pat == '/' ? 1 : -1);
sbuf_free(kw);
*pat = *e ? e + 1 : e;
- if (ex_kwd(&pats[0], &dir))
+ if (ex_kwd(&pat_re, &dir))
return -1;
- re = rset_make(1, pats, xic ? RE_ICASE : 0);
+ re = rstr_make(pat_re, xic ? RE_ICASE : 0);
if (!re)
return -1;
row = xrow + dir;
while (row >= 0 && row < lbuf_len(xb)) {
- if (rset_find(re, lbuf_get(xb, row), 0, NULL, 0) >= 0)
+ if (rstr_find(re, lbuf_get(xb, row), 0, NULL, 0) >= 0)
break;
row += dir;
}
- rset_free(re);
+ rstr_free(re);
return row >= 0 && row < lbuf_len(xb) ? row : -1;
}
t@@ -604,10 +604,9 @@ static void replace(struct sbuf *dst, char *rep, char *ln…
static int ec_substitute(char *loc, char *cmd, char *arg)
{
- struct rset *re;
+ struct rstr *re;
int offs[32];
int beg, end;
- char *pats[1];
char *pat = NULL, *rep = NULL;
char *s = arg;
int i;
t@@ -624,15 +623,15 @@ static int ec_substitute(char *loc, char *cmd, char *arg)
snprintf(xrep, sizeof(xrep), "%s", rep ? rep : "");
free(pat);
free(rep);
- if (ex_kwd(&pats[0], NULL))
+ if (ex_kwd(&pat, NULL))
return 1;
- re = rset_make(1, pats, xic ? RE_ICASE : 0);
+ re = rstr_make(pat, xic ? RE_ICASE : 0);
if (!re)
return 1;
for (i = beg; i < end; i++) {
char *ln = lbuf_get(xb, i);
struct sbuf *r = NULL;
- while (rset_find(re, ln, LEN(offs) / 2, offs, 0) >= 0) {
+ while (rstr_find(re, ln, LEN(offs) / 2, offs, 0) >= 0) {
if (!r)
r = sbuf_make();
sbuf_mem(r, ln, offs[0]);
t@@ -649,7 +648,7 @@ static int ec_substitute(char *loc, char *cmd, char *arg)
sbuf_free(r);
}
}
- rset_free(re);
+ rstr_free(re);
return 0;
}
t@@ -716,10 +715,9 @@ static int ex_exec(char *ln);
static int ec_glob(char *loc, char *cmd, char *arg)
{
- struct rset *re;
+ struct rstr *re;
int offs[32];
int beg, end, not;
- char *pats[1];
char *pat;
char *s = arg;
int i;
t@@ -732,9 +730,9 @@ static int ec_glob(char *loc, char *cmd, char *arg)
if (pat && pat[0])
ex_kwdset(pat, +1);
free(pat);
- if (ex_kwd(&pats[0], NULL))
+ if (ex_kwd(&pat, NULL))
return 1;
- if (!(re = rset_make(1, pats, xic ? RE_ICASE : 0)))
+ if (!(re = rstr_make(pat, xic ? RE_ICASE : 0)))
return 1;
xgdep++;
for (i = beg + 1; i < end; i++)
t@@ -742,7 +740,7 @@ static int ec_glob(char *loc, char *cmd, char *arg)
i = beg;
while (i < lbuf_len(xb)) {
char *ln = lbuf_get(xb, i);
- if ((rset_find(re, ln, LEN(offs) / 2, offs, 0) < 0) == not) {
+ if ((rstr_find(re, ln, LEN(offs) / 2, offs, 0) < 0) == not) {
xrow = i;
if (ex_exec(s))
break;
t@@ -754,7 +752,7 @@ static int ec_glob(char *loc, char *cmd, char *arg)
for (i = 0; i < lbuf_len(xb); i++)
lbuf_globget(xb, i, xgdep);
xgdep--;
- rset_free(re);
+ rstr_free(re);
return 0;
}
diff --git a/mot.c b/mot.c
t@@ -55,13 +55,13 @@ int lbuf_search(struct lbuf *lb, char *kw, int dir, int *r…
int found = 0;
int r0 = *r, o0 = *o;
int i;
- struct rset *re = rset_make(1, &kw, xic ? RE_ICASE : 0);
+ struct rstr *re = rstr_make(kw, xic ? RE_ICASE : 0);
if (!re)
return 1;
for (i = r0; !found && i >= 0 && i < lbuf_len(lb); i += dir) {
char *s = lbuf_get(lb, i);
int off = dir > 0 && r0 == i ? uc_chr(s, o0 + 1) - s : 0;
- while (rset_find(re, s + off, 1, offs,
+ while (rstr_find(re, s + off, 1, offs,
off ? RE_NOTBOL : 0) >= 0) {
if (dir < 0 && r0 == i &&
uc_off(s, off + offs[0]) >= o0)
t@@ -71,11 +71,11 @@ int lbuf_search(struct lbuf *lb, char *kw, int dir, int *r…
*r = i;
*len = uc_off(s + off + offs[0], offs[1] - offs[0]);
off += offs[1] > offs[0] ? offs[1] : offs[1] + 1;
- if (dir > 0)
+ if (dir > 0 || !s[off])
break;
}
}
- rset_free(re);
+ rstr_free(re);
return !found;
}
diff --git a/rstr.c b/rstr.c
t@@ -0,0 +1,115 @@
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "vi.h"
+
+struct rstr {
+ struct rset *rs; /* the compiled regular expression */
+ char *str; /* simple search string */
+ int icase;
+ int lbeg, lend;
+ int wbeg, wend;
+};
+
+static int rstr_simple(struct rstr *rs, char *re)
+{
+ char *beg;
+ char *end;
+ rs->lbeg = re[0] == '^';
+ if (rs->lbeg)
+ re++;
+ rs->wbeg = re[0] == '\\' && re[1] == '<';
+ if (rs->wbeg)
+ re += 2;
+ beg = re;
+ while (re[0] && !strchr("\\.*+?[]{}()$", (unsigned char) re[0]))
+ re++;
+ end = re;
+ rs->wend = re[0] == '\\' && re[1] == '>';
+ if (rs->wend)
+ re += 2;
+ rs->lend = re[0] == '$';
+ if (rs->lend)
+ re++;
+ if (!re[0]) {
+ int len = end - beg;
+ rs->str = malloc(len + 1);
+ memcpy(rs->str, beg, len);
+ rs->str[len] = '\0';
+ return 0;
+ }
+ return 1;
+}
+
+struct rstr *rstr_make(char *re, int flg)
+{
+ struct rstr *rs = malloc(sizeof(*rs));
+ memset(rs, 0, sizeof(*rs));
+ rs->icase = flg & RE_ICASE;
+ if (rstr_simple(rs, re))
+ rs->rs = rset_make(1, &re, flg);
+ if (!rs->rs && !rs->str) {
+ free(rs);
+ return NULL;
+ }
+ return rs;
+}
+
+static int isword(char *s)
+{
+ int c = (unsigned char) s[0];
+ return isalnum(c) || c == '_' || c > 127;
+}
+
+static int match_case(char *s, char *r, int icase)
+{
+ for (; *r && *s; s++, r++) {
+ if (!icase && *s != *r)
+ return 1;
+ if (icase && tolower((unsigned char) *s) != tolower((unsigned …
+ return 1;
+ }
+ return 0;
+}
+
+/* return zero if an occurrence is found */
+int rstr_find(struct rstr *rs, char *s, int n, int *grps, int flg)
+{
+ int len;
+ char *beg, *end;
+ char *r;
+ if (rs->rs)
+ return rset_find(rs->rs, s, n, grps, flg);
+ if ((rs->lbeg && (flg & RE_NOTBOL)) || (rs->lend && (flg & RE_NOTEOL)))
+ return -1;
+ len = strlen(rs->str);
+ beg = s;
+ end = s + strlen(s) - len - 1;
+ if (rs->lbeg)
+ end = beg;
+ if (rs->lend)
+ beg = end;
+ for (r = beg; r <= end; r++) {
+ if (rs->wbeg && r > s && (isword(r - 1) || !isword(r)))
+ continue;
+ if (rs->wend && r[len] && (!isword(r + len - 1) || isword(r + …
+ continue;
+ if (!match_case(r, rs->str, rs->icase)) {
+ if (n >= 1) {
+ grps[0] = r - s;
+ grps[1] = r - s + len;
+ }
+ return 0;
+ }
+ }
+ return -1;
+}
+
+void rstr_free(struct rstr *rs)
+{
+ if (rs->rs)
+ rset_free(rs->rs);
+ free(rs->str);
+ free(rs);
+}
diff --git a/vi.h b/vi.h
t@@ -45,15 +45,19 @@ void sbuf_printf(struct sbuf *sbuf, char *s, ...);
int sbuf_len(struct sbuf *sb);
void sbuf_cut(struct sbuf *s, int len);
-/* regular expression sets */
+/* regular expressions */
#define RE_ICASE 1
#define RE_NOTBOL 2
#define RE_NOTEOL 4
-
+/* regular expression sets: searching for multiple regular expressions */
struct rset *rset_make(int n, char **pat, int flg);
int rset_find(struct rset *re, char *s, int n, int *grps, int flg);
void rset_free(struct rset *re);
char *re_read(char **src);
+/* searching for a single pattern regular expression */
+struct rstr *rstr_make(char *re, int flg);
+int rstr_find(struct rstr *rs, char *s, int n, int *grps, int flg);
+void rstr_free(struct rstr *rs);
/* rendering lines */
int *ren_position(char *s);
You are viewing proxied material from mx1.adamsgaard.dk. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.