The attached patch fixes the bug I reported earlier today that spams
Traditional Inline PGP 2.X with U+FFFD (as replacement for an invalid
UTF-8 sequence) where Latin 1 national characters had been.
It works like this:
Rather than assuming the PGP output file is in UTF-8 format,
1. check if it really is valid UTF-8
2a. if it is, behavior is unchanged
2b. if it is NOT UTF-8, assume PGP output is in body_charset,
as suggested by Tamotsu Takahashi.
TEST STATUS: works for me.
--
Matthias Andree
Index: Makefile.am
===================================================================
RCS file: /home/roessler/cvs/mutt/Makefile.am,v
retrieving revision 3.29
diff -u -r3.29 Makefile.am
--- Makefile.am 4 Feb 2005 16:54:13 -0000 3.29
+++ Makefile.am 10 Feb 2005 17:37:52 -0000
@@ -64,7 +64,8 @@
browser.h mbyte.h remailer.h url.h mutt_ssl_nss.c \
crypt-mod-pgp-classic.c crypt-mod-smime-classic.c \
pgppacket.c mutt_idna.h hcache.c mutt_ssl_gnutls.c \
- crypt-gpgme.c crypt-mod-pgp-gpgme.c crypt-mod-smime-gpgme.c
+ crypt-gpgme.c crypt-mod-pgp-gpgme.c crypt-mod-smime-gpgme.c \
+ utf8chk.h utf8chk.c
EXTRA_DIST = COPYRIGHT GPL OPS OPS.PGP OPS.CRYPT OPS.SMIME TODO \
configure acconfig.h account.h \
Index: configure.in
===================================================================
RCS file: /home/roessler/cvs/mutt/configure.in,v
retrieving revision 3.21
diff -u -r3.21 configure.in
--- configure.in 31 Jan 2005 02:40:14 -0000 3.21
+++ configure.in 10 Feb 2005 17:37:53 -0000
@@ -130,7 +130,7 @@
AC_DEFINE(CRYPT_BACKEND_CLASSIC_PGP,1,
[ Define if you want classic PGP support. ])
PGPAUX_TARGET="pgpring pgpewrap"
- MUTT_LIB_OBJECTS="$MUTT_LIB_OBJECTS pgp.o pgpinvoke.o pgpkey.o
pgplib.o gnupgparse.o pgpmicalg.o pgppacket.o crypt-mod-pgp-classic.o"
+ MUTT_LIB_OBJECTS="$MUTT_LIB_OBJECTS pgp.o utf8chk.o
pgpinvoke.o pgpkey.o pgplib.o gnupgparse.o pgpmicalg.o pgppacket.o
crypt-mod-pgp-classic.o"
fi
AC_ARG_ENABLE(smime, [ --disable-smime Disable SMIME
support],
Index: pgp.c
===================================================================
RCS file: /home/roessler/cvs/mutt/pgp.c,v
retrieving revision 3.39
diff -u -r3.39 pgp.c
--- pgp.c 3 Feb 2005 18:44:27 -0000 3.39
+++ pgp.c 10 Feb 2005 17:37:53 -0000
@@ -2,6 +2,7 @@
* Copyright (C) 1996,1997 Michael R. Elkins <me@xxxxxxxx>
* Copyright (C) 1998,1999 Thomas Roessler <roessler@xxxxxxxxxxxxxxxxxx>
* Copyright (C) 2004 g10 Code GmbH
+ * Copyright (C) 2005 Matthias Andree <matthias.andree@xxxxxx>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -35,6 +36,7 @@
#include "pgp.h"
#include "mime.h"
#include "copy.h"
+#include "utf8chk.h"
#include <sys/wait.h>
#include <string.h>
@@ -407,10 +409,29 @@
else if (pgpout)
{
FGETCONV *fc;
- int c;
- rewind (pgpout);
+ int c, valid_utf8 = 1;
+
+ {
+ struct is_valid_utf8_state u8s;
+
+ /* check if file is UTF-8, if it isn't, assume body character set */
+ is_valid_utf8_init(&u8s);
+ rewind (pgpout);
+ do {
+ c = fgetc(pgpout);
+ if (!is_valid_utf8(&u8s, c)) {
+ valid_utf8 = 0;
+ break;
+ }
+ } while (c != EOF);
+ }
+
+ dprint (1, (debugfile, "pgp.c:%d: pgpout is %s UTF-8,
body_charset=\"%s\"\n", __LINE__, valid_utf8 ? "conformant" : "not",
body_charset));
+
+ /* now decode */
+ rewind(pgpout);
state_set_prefix (s);
- fc = fgetconv_open (pgpout, "utf-8", Charset, 0);
+ fc = fgetconv_open (pgpout, valid_utf8 ? "utf-8" : body_charset,
Charset, 0);
while ((c = fgetconv (fc)) != EOF)
state_prefix_putc (c, s);
fgetconv_close (&fc);
--- /dev/null 2004-10-02 10:38:03.000000000 +0200
+++ utf8chk.h 2005-02-10 18:11:28.000000000 +0100
@@ -0,0 +1,31 @@
+/* utf8chk.h -- fast ANSI-C UTF-8 validator
+ * Copyright (C) 2005 Matthias Andree <matthias.andree@xxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef UTF8CHK_H
+#define UTF8CHK_H 1
+
+struct is_valid_utf8_state {
+ unsigned long out;
+ int count;
+ int icount;
+};
+
+extern void is_valid_utf8_init(struct is_valid_utf8_state *s);
+extern int is_valid_utf8(struct is_valid_utf8_state *s, int in);
+
+#endif
--- /dev/null 2004-10-02 10:38:03.000000000 +0200
+++ utf8chk.c 2005-02-10 18:11:47.000000000 +0100
@@ -0,0 +1,99 @@
+/* utf8chk.c -- fast ANSI-C UTF-8 validator
+ * Copyright (C) 2005 Matthias Andree <matthias.andree@xxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <stdio.h>
+#include "utf8chk.h"
+
+void is_valid_utf8_init(struct is_valid_utf8_state *s) {
+ s->out = 0;
+ s->count = s->icount = 0;
+}
+
+int is_valid_utf8(struct is_valid_utf8_state *s, int in) {
+ unsigned char c = (unsigned char)in;
+
+ if (in == EOF)
+ return s->icount == 0;
+
+ if (s->icount == 0) {
+ if (c < 0x80)
+ return 1;
+ if (c >= 0xfe)
+ return 0;
+ /* perhaps add s->count = 0; but for now assume
+ * that _init is called first */
+ if (c >= 0xfc)
+ s->count = 5, s->out = c & 0x1;
+ else if (c >= 0xf8)
+ s->count = 4, s->out = c & 0x3;
+ else if (c >= 0xf0)
+ s->count = 3, s->out = c & 0x7;
+ else if (c >= 0xe0)
+ s->count = 2, s->out = c & 0x0f;
+ /* c2 rather than c0 catches overlong sequences right away */
+ else if (c >= 0xc2)
+ s->count = 1, s->out = c & 0x1f;
+ s->icount = s->count;
+ return s->count != 0;
+ } else {
+ s->out <<= 6;
+ if (c < 0x80 || c >= 0xc0)
+ return 0;
+ }
+ s->out |= (c & 0x3f);
+ if (-- s->icount)
+ return 1;
+
+ if (s->out == 0xfffe || s->out == 0xffff)
+ return 0;
+ if (s->out >= 0xd800 && s->out < 0xe000)
+ return 0;
+ /* note this "overlong sequence" check does not detect
+ * 0x40...0x7f for 2-byte sequences, hence check that separately
+ * - luckily these invalid 2-byte sequences have 0xc0 or 0xc1 as
+ * their first byte */
+ if (s->out < (1u << (5 * s->count + 1)))
+ return 0;
+ return 1;
+}
+
+#ifdef TEST
+#include <stdlib.h>
+int main(int argc, char **argv) {
+ int x;
+ struct is_valid_utf8_state s;
+ unsigned long count = 0;
+
+ is_valid_utf8_init(&s);
+
+ while (1) {
+ x = fgetc(stdin);
+ if (!is_valid_utf8(&s, x)) {
+ printf("BAD character at position %lu\n", count);
+ exit(EXIT_FAILURE);
+ }
+
+ if (x == EOF) {
+ printf("OK\n");
+ break;
+ }
+ count ++;
+ }
+ return 0;
+}
+#endif
Attachment:
pgpgb01OYljNi.pgp
Description: PGP signature