Hi,
I'm trying to work out a way of progressing mg to be 8 bit clean. Not
being 100% sure of the scope of the problem and solution, attached is
a diff that is trying to remove the implied or faked '\n' character
from mg's buffers.
I'm taking the approach of loading the '\n' char from disk to buffer,
instead of removing it at buffer load time (when opening a file) as mg
does at the moment. This diff only fixes 20% of the ramifications of
doing that, so if you apply the diff mg will be very buggy afterwards,
but this diff is only trying to discuss the approach of going 8 bit
clean and how to acheive that. If removing the implied '\n' character
this way is deemed suitable then I can carry on cleaning up the other
80% so that the main functionality of mg is not affected with not
having an implied \n or a faked \n but having a 'real' \n on a
new-line in a buffer.
One of the changes in having a \n at the end of a line is how mg works
with the length of a line. Some functions need to know the new length
(+1 char) but some others don't. For example, if you C-e to the end of
a line, currently mg leaves the cursor where a \n should be and says
the cursor is on X column. If you then M-x what-cursor-position, mg
fakes a \n character and tells you a \n is under the cursor.
With the attached diff, if you C-e to the end of a line, the function
called via C-e (gotoeol), puts the cursor on the length of the
displayed line, which is 1 less than the 'real' length of the line,
which now includes a \n char. If you then M-x what-cursor-position,
mg will report the cursor is on a \n character because it _is_ on a \n
character.
I've introduced a new version of llength():
#define llength(lp) ((lp)->l_used) /* real line length */
#define dis_llength(lp) (llength(lp) - 1) /* displayed line
length */
However, is this approach the right way to progress 8 bit cleaness? I am
no sure, please feel free to express your opinion. Also, is anyone
else working on this?
Mark
Index: basic.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/basic.c,v
retrieving revision 1.47
diff -u -p -r1.47 basic.c
--- basic.c 10 Oct 2015 09:13:14 -0000 1.47
+++ basic.c 15 Nov 2018 08:00:03 -0000
@@ -59,7 +59,7 @@ backchar(int f, int n)
return (FALSE);
}
curwp->w_dotp = lp;
- curwp->w_doto = llength(lp);
+ curwp->w_doto = dis_llength(lp);
curwp->w_rflag |= WFMOVE;
curwp->w_dotline--;
} else
@@ -78,7 +78,8 @@ gotoeol(int f, int n)
if (n == 0)
return (TRUE);
- curwp->w_doto = llength(curwp->w_dotp);
+ /* need to check if line is end of buffer and no '\n'*/
+ curwp->w_doto = dis_llength(curwp->w_dotp);
return (TRUE);
}
@@ -95,7 +96,7 @@ forwchar(int f, int n)
if (n < 0)
return (backchar(f, -n));
while (n--) {
- if (curwp->w_doto == llength(curwp->w_dotp)) {
+ if (curwp->w_doto == dis_llength(curwp->w_dotp)) {
curwp->w_dotp = lforw(curwp->w_dotp);
if (curwp->w_dotp == curbp->b_headp) {
curwp->w_dotp = lback(curwp->w_dotp);
@@ -142,7 +143,7 @@ gotoeob(int f, int n)
(void) setmark(f, n);
curwp->w_dotp = blastlp(curbp);
- curwp->w_doto = llength(curwp->w_dotp);
+ curwp->w_doto = llength(curwp->w_dotp);/* check no '\n'!! */
curwp->w_dotline = curwp->w_bufp->b_lines;
lp = curwp->w_dotp;
@@ -190,7 +191,8 @@ forwline(int f, int n)
dlp = lforw(dlp);
if (dlp == curbp->b_headp) {
curwp->w_dotp = lback(dlp);
- curwp->w_doto = llength(curwp->w_dotp);
+ /*check if end of buffer and no '\n'*/
+ curwp->w_doto = dis_llength(curwp->w_dotp);
curwp->w_rflag |= WFMOVE;
if (!(f & FFRAND)) {
dobeep();
@@ -275,7 +277,9 @@ getgoal(struct line *dlp)
for (i = 0; i < llength(dlp); i++) {
c = lgetc(dlp, i);
- if (c == '\t'
+ if (c == '\n')
+ ; /* check working */
+ else if (c == '\t'
#ifdef NOTAB
&& !(curbp->b_flag & BFNOTAB)
#endif
Index: def.h
===================================================================
RCS file: /cvs/src/usr.bin/mg/def.h,v
retrieving revision 1.156
diff -u -p -r1.156 def.h
--- def.h 29 Aug 2018 07:50:16 -0000 1.156
+++ def.h 15 Nov 2018 08:00:03 -0000
@@ -150,7 +150,8 @@ struct line {
#define lback(lp) ((lp)->l_bp)
#define lgetc(lp, n) (CHARMASK((lp)->l_text[(n)]))
#define lputc(lp, n, c) ((lp)->l_text[(n)]=(c))
-#define llength(lp) ((lp)->l_used)
+#define llength(lp) ((lp)->l_used) /* real line length */
+#define dis_llength(lp) (llength(lp) - 1) /* displayed line
length */
#define ltext(lp) ((lp)->l_text)
/*
Index: display.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/display.c,v
retrieving revision 1.48
diff -u -p -r1.48 display.c
--- display.c 6 Jul 2017 19:27:37 -0000 1.48
+++ display.c 15 Nov 2018 08:00:03 -0000
@@ -319,6 +319,8 @@ vtputc(int c)
vp = vscreen[vtrow];
if (vtcol >= ncol)
vp->v_text[ncol - 1] = '$';
+ else if (c == '\n')
+ ; /* do not print newline to screen */
else if (c == '\t'
#ifdef NOTAB
&& !(curbp->b_flag & BFNOTAB)
@@ -355,6 +357,8 @@ vtpute(int c)
vp = vscreen[vtrow];
if (vtcol >= ncol)
vp->v_text[ncol - 1] = '$';
+ else if (c == '\n')
+ ;
else if (c == '\t'
#ifdef NOTAB
&& !(curbp->b_flag & BFNOTAB)
@@ -525,7 +529,9 @@ update(int modelinecolor)
) {
curcol |= 0x07;
curcol++;
- } else if (ISCTRL(c) != FALSE)
+ } else if (c == '\n')
+ ;
+ else if (ISCTRL(c) != FALSE)
curcol += 2;
else if (isprint(c))
curcol++;
Index: extend.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/extend.c,v
retrieving revision 1.64
diff -u -p -r1.64 extend.c
--- extend.c 1 Sep 2016 21:06:09 -0000 1.64
+++ extend.c 15 Nov 2018 08:00:03 -0000
@@ -926,7 +926,7 @@ cleanup:
static char *
skipwhite(char *s)
{
- while (*s == ' ' || *s == '\t' || *s == ')' || *s == '(')
+ while (*s == ' ' || *s == '\t' || *s == ')' || *s == '(' || *s == '\n')
s++;
if ((*s == ';') || (*s == '#'))
*s = '\0';
@@ -937,7 +937,8 @@ static char *
parsetoken(char *s)
{
if (*s != '"') {
- while (*s && *s != ' ' && *s != '\t' && *s != ')' && *s != '(')
+ while (*s && *s != ' ' && *s != '\t' && *s != ')' && *s != '('
+ && *s != '\n')
s++;
if (*s == ';')
*s = '\0';
Index: fileio.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/fileio.c,v
retrieving revision 1.105
diff -u -p -r1.105 fileio.c
--- fileio.c 13 Apr 2018 14:11:37 -0000 1.105
+++ fileio.c 15 Nov 2018 08:00:03 -0000
@@ -162,7 +162,7 @@ ffputbuf(FILE *ffp, struct buffer *bp)
return (FIOERR);
}
if (lforw(lp) != lpend) /* no implied \n on last line */
- putc('\n', ffp);
+ /*putc('\n', ffp)*/;
}
/*
* XXX should be variable controlled (once we have variables)
@@ -189,10 +189,12 @@ ffgetline(FILE *ffp, char *buf, int nbuf
int c, i;
i = 0;
- while ((c = getc(ffp)) != EOF && c != '\n') {
+ while ((c = getc(ffp)) != EOF/* && c != '\n'*/) {
buf[i++] = c;
if (i >= nbuf)
return (FIOLONG);
+ if (c == '\n')
+ break;
}
if (c == EOF && ferror(ffp) != FALSE) {
dobeep();
Index: line.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/line.c,v
retrieving revision 1.61
diff -u -p -r1.61 line.c
--- line.c 29 Aug 2018 07:50:16 -0000 1.61
+++ line.c 15 Nov 2018 08:00:03 -0000
@@ -300,6 +300,8 @@ lnewline_at(struct line *lp1, int doto)
if (nlen != 0)
bcopy(&lp1->l_text[doto], &lp2->l_text[0], nlen);
lp1->l_used = doto;
+ /* add new \n here */
+ linsert(1, '\n');
lp2->l_bp = lp1;
lp2->l_fp = lp1->l_fp;
lp1->l_fp = lp2;
Index: paragraph.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/paragraph.c,v
retrieving revision 1.45
diff -u -p -r1.45 paragraph.c
--- paragraph.c 6 Sep 2016 16:25:47 -0000 1.45
+++ paragraph.c 15 Nov 2018 08:00:03 -0000
@@ -338,6 +338,8 @@ transposepara(int f, int n)
if (n == 0)
return (TRUE);
+ undo_boundary_enable(FFRAND, 0);
+
/* find a paragraph, set mark, then goto the end */
gotobop(FFRAND, 1);
curwp->w_markp = curwp->w_dotp;
@@ -364,6 +366,8 @@ transposepara(int f, int n)
return (FALSE);
}
(void)yank(FFRAND, 1);
+
+ undo_boundary_enable(FFRAND, 1);
return (TRUE);
}
Index: region.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/region.c,v
retrieving revision 1.37
diff -u -p -r1.37 region.c
--- region.c 9 Sep 2016 06:05:51 -0000 1.37
+++ region.c 15 Nov 2018 08:00:03 -0000
@@ -90,16 +90,14 @@ copyregion(int f, int n)
loffs = region.r_offset;
while (region.r_size--) {
- if (loffs == llength(linep)) { /* End of line. */
- if ((s = kinsert('\n', KFORW)) != TRUE)
- return (s);
+ if ((s = kinsert(lgetc(linep, loffs), KFORW)) != TRUE) {
+ return (s);
+ } else if (loffs == llength(linep)) { /* End of line. */
linep = lforw(linep);
loffs = 0;
- } else { /* Middle of line. */
- if ((s = kinsert(lgetc(linep, loffs), KFORW)) != TRUE)
- return (s);
+ } else
++loffs;
- }
+
}
clearmark(FFARG, 0);
Index: search.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/search.c,v
retrieving revision 1.47
diff -u -p -r1.47 search.c
--- search.c 11 Jul 2018 12:21:37 -0000 1.47
+++ search.c 15 Nov 2018 08:00:04 -0000
@@ -285,7 +285,7 @@ isearch(int dir)
if (success == FALSE && dir == SRCH_BACK) {
/* wrap the search to end */
curwp->w_dotp = blastlp(curbp);
- curwp->w_doto = llength(curwp->w_dotp);
+ curwp->w_doto = dis_llength(curwp->w_dotp);
curwp->w_dotline = curwp->w_bufp->b_lines;
if (is_find(dir) != FALSE) {
is_cpush(SRCH_MARK);
@@ -328,7 +328,7 @@ isearch(int dir)
if (ISUPPER(CHARMASK(pat[i])))
xcase = 1;
- while (cbo < llength(clp)) {
+ while (cbo < dis_llength(clp)) {
c = lgetc(clp, cbo++);
if ((!firstc && !isalnum(c)))
break;
@@ -697,7 +697,7 @@ forwsrch(void)
if (ISUPPER(CHARMASK(pat[i])))
xcase = 1;
for (;;) {
- if (cbo == llength(clp)) {
+ if (cbo == dis_llength(clp)) {
if ((clp = lforw(clp)) == curbp->b_headp)
break;
nline++;
@@ -710,7 +710,7 @@ forwsrch(void)
tbo = cbo;
pp = &pat[1];
while (*pp != 0) {
- if (tbo == llength(tlp)) {
+ if (tbo == dis_llength(tlp)) {
tlp = lforw(tlp);
if (tlp == curbp->b_headp)
goto fail;
@@ -763,11 +763,11 @@ backsrch(void)
if (clp == curbp->b_headp)
return (FALSE);
nline--;
- cbo = llength(clp) + 1;
+ cbo = dis_llength(clp) + 1;
}
- if (--cbo == llength(clp))
+/* if (--cbo == llength(clp))
c = CCHR('J');
- else
+ else*/
c = lgetc(clp, cbo);
if (eq(c, *epp, xcase) != FALSE) {
tlp = clp;
@@ -780,11 +780,11 @@ backsrch(void)
if (tlp == curbp->b_headp)
goto fail;
nline--;
- tbo = llength(tlp) + 1;
+ tbo = llength(tlp);
}
- if (--tbo == llength(tlp))
+ /*if (--tbo == llength(tlp))
c = CCHR('J');
- else
+ else */
c = lgetc(tlp, tbo);
if (eq(c, *--pp, xcase) == FALSE) {
nline = pline;
Index: tags.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/tags.c,v
retrieving revision 1.16
diff -u -p -r1.16 tags.c
--- tags.c 6 Aug 2017 04:39:45 -0000 1.16
+++ tags.c 15 Nov 2018 08:00:04 -0000
@@ -412,7 +412,7 @@ searchpat(char *s_pat)
dotline = 1;
lp = lforw(curbp->b_headp);
while (lp != curbp->b_headp) {
- if (ltext(lp) != NULL && plen <= llength(lp) &&
+ if (ltext(lp) != NULL && plen <= dis_llength(lp) &&
(strncmp(s_pat, ltext(lp), plen) == 0)) {
curwp->w_doto = 0;
curwp->w_dotp = lp;
Index: undo.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/undo.c,v
retrieving revision 1.58
diff -u -p -r1.58 undo.c
--- undo.c 5 Sep 2016 08:10:58 -0000 1.58
+++ undo.c 15 Nov 2018 08:00:04 -0000
@@ -53,7 +53,7 @@ find_dot(struct line *lp, int off)
return (FALSE);
}
}
- count += llength(p) + 1;
+ count += dis_llength(p);
}
count += off;
@@ -68,8 +68,8 @@ find_lo(int pos, struct line **olp, int
p = curbp->b_headp;
lineno = 0;
- while (pos > llength(p)) {
- pos -= llength(p) + 1;
+ while (pos > dis_llength(p)) {
+ pos -= llength(p);
if ((p = lforw(p)) == curbp->b_headp) {
*olp = NULL;
*offset = 0;
@@ -324,7 +324,7 @@ undo_add_delete(struct line *lp, int off
pos = find_dot(lp, offset);
- if (offset == llength(lp)) /* if it's a newline... */
+ if (offset == dis_llength(lp)) /* if it's a newline... */
undo_add_boundary(FFRAND, 1);
else if ((rec = TAILQ_FIRST(&curbp->b_undo)) != NULL) {
/*
Index: util.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/util.c,v
retrieving revision 1.38
diff -u -p -r1.38 util.c
--- util.c 18 Nov 2015 18:21:06 -0000 1.38
+++ util.c 15 Nov 2018 08:00:04 -0000
@@ -24,6 +24,7 @@
* position display; it does not truncate just because the screen does.
* This is normally bound to "C-X =".
*/
+/* Good way to check if '\n' is on dot - (012)! */
/* ARGSUSED */
int
showcpos(int f, int n)
@@ -48,18 +49,18 @@ showcpos(int f, int n)
/* mark line */
cline = nline;
cchar = nchar + curwp->w_doto;
- if (curwp->w_doto == llength(clp))
- cbyte = '\n';
- else
+ /* if (curwp->w_doto == llength(clp)) */
+ /* cbyte = '\n' */;
+ /* else */
cbyte = lgetc(clp, curwp->w_doto);
}
/* now count the chars */
- nchar += llength(clp);
+ nchar += llength(clp); /* including '\n' if required */
clp = lforw(clp);
if (clp == curbp->b_headp)
break;
/* count the newline */
- nchar++;
+ /* nchar++; */
}
/* determine row */
row = curwp->w_toprow + 1;
@@ -85,7 +86,10 @@ getcolpos(struct mgwin *wp)
for (i = 0; i < wp->w_doto; ++i) {
c = lgetc(wp->w_dotp, i);
- if (c == '\t'
+ if (c == '\n') {
+ /* col--;*/
+ break;
+ } else if (c == '\t'
#ifdef NOTAB
&& !(wp->w_bufp->b_flag & BFNOTAB)
#endif /* NOTAB */
@@ -130,12 +134,12 @@ twiddle(int f, int n)
return(FALSE);
}
/* Don't twiddle if the dot is on the last char of buffer */
- if (doto == llength(dotp) && lforw(dotp) == curbp->b_headp) {
+ if (doto == dis_llength(dotp) && lforw(dotp) == curbp->b_headp) {
dobeep();
return(FALSE);
}
undo_boundary_enable(FFRAND, 0);
- if (doto == 0 && doto == llength(dotp)) { /* only '\n' on this line */
+ if (doto == 0 && doto == dis_llength(dotp)) { /* only '\n' on this line
*/
(void)forwline(FFRAND, 1);
curwp->w_doto = 0;
} else {
@@ -224,11 +228,11 @@ deblank(int f, int n)
RSIZE nld;
lp1 = curwp->w_dotp;
- while (llength(lp1) == 0 && (lp2 = lback(lp1)) != curbp->b_headp)
+ while (dis_llength(lp1) == 0 && (lp2 = lback(lp1)) != curbp->b_headp)
lp1 = lp2;
lp2 = lp1;
nld = (RSIZE)0;
- while ((lp2 = lforw(lp2)) != curbp->b_headp && llength(lp2) == 0)
+ while ((lp2 = lforw(lp2)) != curbp->b_headp && dis_llength(lp2) == 0)
++nld;
if (nld == 0)
return (TRUE);
@@ -261,7 +265,7 @@ delwhite(int f, int n)
col = curwp->w_doto;
- while (col < llength(curwp->w_dotp) &&
+ while (col < dis_llength(curwp->w_dotp) &&
(isspace(lgetc(curwp->w_dotp, col))))
++col;
do {
@@ -292,7 +296,7 @@ delleadwhite(int f, int n)
slp = curwp->w_dotp;
soff = curwp->w_doto;
- for (ls = 0; ls < llength(slp); ls++)
+ for (ls = 0; ls < dis_llength(slp); ls++)
if (!isspace(lgetc(slp, ls)))
break;
gotobol(FFRAND, 1);
@@ -476,7 +480,7 @@ int
backtoindent(int f, int n)
{
gotobol(FFRAND, 1);
- while (curwp->w_doto < llength(curwp->w_dotp) &&
+ while (curwp->w_doto < dis_llength(curwp->w_dotp) &&
(isspace(lgetc(curwp->w_dotp, curwp->w_doto))))
++curwp->w_doto;
return (TRUE);
Index: word.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/word.c,v
retrieving revision 1.19
diff -u -p -r1.19 word.c
--- word.c 30 Dec 2015 20:51:51 -0000 1.19
+++ word.c 15 Nov 2018 08:00:04 -0000
@@ -509,6 +509,6 @@ int
inword(void)
{
/* can't use lgetc in ISWORD due to bug in OSK cpp */
- return (curwp->w_doto != llength(curwp->w_dotp) &&
+ return (curwp->w_doto != dis_llength(curwp->w_dotp) &&
ISWORD(curwp->w_dotp->l_text[curwp->w_doto]));
}
Index: yank.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/yank.c,v
retrieving revision 1.14
diff -u -p -r1.14 yank.c
--- yank.c 11 Dec 2015 20:21:23 -0000 1.14
+++ yank.c 15 Nov 2018 08:00:04 -0000
@@ -161,13 +161,13 @@ killline(int f, int n)
kdelete();
thisflag |= CFKILL;
if (!(f & FFARG)) {
- for (i = curwp->w_doto; i < llength(curwp->w_dotp); ++i)
+ for (i = curwp->w_doto; i < dis_llength(curwp->w_dotp); ++i)
if ((c = lgetc(curwp->w_dotp, i)) != ' ' && c != '\t')
break;
- if (i == llength(curwp->w_dotp))
+ if (i == dis_llength(curwp->w_dotp))
chunk = llength(curwp->w_dotp) - curwp->w_doto + 1;
else {
- chunk = llength(curwp->w_dotp) - curwp->w_doto;
+ chunk = dis_llength(curwp->w_dotp) - curwp->w_doto;
if (chunk == 0)
chunk = 1;
}