Loading ...
Sorry, an error occurred while loading the content.
 

Patch 7.2.312

Expand Messages
  • Bram Moolenaar
    Patch 7.2.312 Problem: iconv() returns an invalid character sequence when conversion fails. It should return an empty string. (Yongwei Wu) Solution: Be
    Message 1 of 1 , Dec 2, 2009
      Patch 7.2.312
      Problem: iconv() returns an invalid character sequence when conversion
      fails. It should return an empty string. (Yongwei Wu)
      Solution: Be more strict about invalid characters in the input.
      Files: src/mbyte.c


      *** ../vim-7.2.311/src/mbyte.c 2009-06-16 15:23:07.000000000 +0200
      --- src/mbyte.c 2009-11-25 16:10:44.000000000 +0100
      ***************
      *** 133,154 ****
      static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
      static int dbcs_ptr2char __ARGS((char_u *p));

      ! /* Lookup table to quickly get the length in bytes of a UTF-8 character from
      ! * the first byte of a UTF-8 string. Bytes which are illegal when used as the
      ! * first byte have a one, because these will be used separately. */
      static char utf8len_tab[256] =
      {
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/
      ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/
      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
      3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
      };

      /*
      * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks
      * in the "xim.log" file.
      */
      --- 133,172 ----
      static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
      static int dbcs_ptr2char __ARGS((char_u *p));

      ! /*
      ! * Lookup table to quickly get the length in bytes of a UTF-8 character from
      ! * the first byte of a UTF-8 string.
      ! * Bytes which are illegal when used as the first byte have a 1.
      ! * The NUL byte has length 1.
      ! */
      static char utf8len_tab[256] =
      {
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
      3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
      };

      /*
      + * Like utf8len_tab above, but using a zero for illegal lead bytes.
      + */
      + static char utf8len_tab_zero[256] =
      + {
      + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
      + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
      + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0,
      + };
      +
      + /*
      * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks
      * in the "xim.log" file.
      */
      ***************
      *** 1352,1358 ****
      if (size > 0 && *p >= 0x80)
      {
      if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
      ! return 1;
      c = utf_ptr2char(p);
      /* An illegal byte is displayed as <xx>. */
      if (utf_ptr2len(p) == 1 || c == NUL)
      --- 1370,1376 ----
      if (size > 0 && *p >= 0x80)
      {
      if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
      ! return 1; /* truncated */
      c = utf_ptr2char(p);
      /* An illegal byte is displayed as <xx>. */
      if (utf_ptr2len(p) == 1 || c == NUL)
      ***************
      *** 1473,1479 ****
      if (p[0] < 0x80) /* be quick for ASCII */
      return p[0];

      ! len = utf8len_tab[p[0]];
      if (len > 1 && (p[1] & 0xc0) == 0x80)
      {
      if (len == 2)
      --- 1491,1497 ----
      if (p[0] < 0x80) /* be quick for ASCII */
      return p[0];

      ! len = utf8len_tab_zero[p[0]];
      if (len > 1 && (p[1] & 0xc0) == 0x80)
      {
      if (len == 2)
      ***************
      *** 1723,1728 ****
      --- 1741,1747 ----
      /*
      * Return length of UTF-8 character, obtained from the first byte.
      * "b" must be between 0 and 255!
      + * Returns 1 for an invalid first byte value.
      */
      int
      utf_byte2len(b)
      ***************
      *** 1737,1742 ****
      --- 1756,1762 ----
      * Returns 1 for "".
      * Returns 1 for an illegal byte sequence (also in incomplete byte seq.).
      * Returns number > "size" for an incomplete byte sequence.
      + * Never returns zero.
      */
      int
      utf_ptr2len_len(p, size)
      ***************
      *** 1747,1757 ****
      int i;
      int m;

      ! if (*p == NUL)
      ! return 1;
      ! m = len = utf8len_tab[*p];
      if (len > size)
      m = size; /* incomplete byte sequence. */
      for (i = 1; i < m; ++i)
      if ((p[i] & 0xc0) != 0x80)
      return 1;
      --- 1767,1779 ----
      int i;
      int m;

      ! len = utf8len_tab[*p];
      ! if (len == 1)
      ! return 1; /* NUL, ascii or illegal lead byte */
      if (len > size)
      m = size; /* incomplete byte sequence. */
      + else
      + m = len;
      for (i = 1; i < m; ++i)
      if ((p[i] & 0xc0) != 0x80)
      return 1;
      ***************
      *** 2505,2510 ****
      --- 2527,2533 ----
      /*
      * mb_head_off() function pointer.
      * Return offset from "p" to the first byte of the character it points into.
      + * If "p" points to the NUL at the end of the string return 0.
      * Returns 0 when already at the first byte of a character.
      */
      int
      ***************
      *** 2524,2530 ****

      /* It can't be a trailing byte when not using DBCS, at the start of the
      * string or the previous byte can't start a double-byte. */
      ! if (p <= base || MB_BYTE2LEN(p[-1]) == 1)
      return 0;

      /* This is slow: need to start at the base and go forward until the
      --- 2547,2553 ----

      /* It can't be a trailing byte when not using DBCS, at the start of the
      * string or the previous byte can't start a double-byte. */
      ! if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL)
      return 0;

      /* This is slow: need to start at the base and go forward until the
      ***************
      *** 2552,2558 ****
      * lead byte in the current cell. */
      if (p <= base
      || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e)
      ! || MB_BYTE2LEN(p[-1]) == 1)
      return 0;

      /* This is slow: need to start at the base and go forward until the
      --- 2575,2582 ----
      * lead byte in the current cell. */
      if (p <= base
      || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e)
      ! || MB_BYTE2LEN(p[-1]) == 1
      ! || *p == NUL)
      return 0;

      /* This is slow: need to start at the base and go forward until the
      ***************
      *** 2578,2583 ****
      --- 2602,2608 ----
      char_u *q;
      char_u *s;
      int c;
      + int len;
      #ifdef FEAT_ARABIC
      char_u *j;
      #endif
      ***************
      *** 2597,2604 ****
      --q;
      /* Check for illegal sequence. Do allow an illegal byte after where we
      * started. */
      ! if (utf8len_tab[*q] != (int)(s - q + 1)
      ! && utf8len_tab[*q] != (int)(p - q + 1))
      return 0;

      if (q <= base)
      --- 2622,2629 ----
      --q;
      /* Check for illegal sequence. Do allow an illegal byte after where we
      * started. */
      ! len = utf8len_tab[*q];
      ! if (len != (int)(s - q + 1) && len != (int)(p - q + 1))
      return 0;

      if (q <= base)
      ***************
      *** 2810,2818 ****

      while (end == NULL ? *p != NUL : p < end)
      {
      ! if ((*p & 0xc0) == 0x80)
      return FALSE; /* invalid lead byte */
      - l = utf8len_tab[*p];
      if (end != NULL && p + l > end)
      return FALSE; /* incomplete byte sequence */
      ++p;
      --- 2835,2843 ----

      while (end == NULL ? *p != NUL : p < end)
      {
      ! l = utf8len_tab_zero[*p];
      ! if (l == 0)
      return FALSE; /* invalid lead byte */
      if (end != NULL && p + l > end)
      return FALSE; /* incomplete byte sequence */
      ++p;
      ***************
      *** 6117,6128 ****
      d = retval;
      for (i = 0; i < len; ++i)
      {
      ! l = utf_ptr2len(ptr + i);
      if (l == 0)
      *d++ = NUL;
      else if (l == 1)
      {
      ! if (unconvlenp != NULL && utf8len_tab[ptr[i]] > len - i)
      {
      /* Incomplete sequence at the end. */
      *unconvlenp = len - i;
      --- 6142,6161 ----
      d = retval;
      for (i = 0; i < len; ++i)
      {
      ! l = utf_ptr2len_len(ptr + i, len - i);
      if (l == 0)
      *d++ = NUL;
      else if (l == 1)
      {
      ! int l_w = utf8len_tab_zero[ptr[i]];
      !
      ! if (l_w == 0)
      ! {
      ! /* Illegal utf-8 byte cannot be converted */
      ! vim_free(retval);
      ! return NULL;
      ! }
      ! if (unconvlenp != NULL && l_w > len - i)
      {
      /* Incomplete sequence at the end. */
      *unconvlenp = len - i;
      *** ../vim-7.2.311/src/version.c 2009-12-02 13:32:10.000000000 +0100
      --- src/version.c 2009-12-02 15:00:23.000000000 +0100
      ***************
      *** 683,684 ****
      --- 683,686 ----
      { /* Add new patch number below this line */
      + /**/
      + 312,
      /**/

      --
      hundred-and-one symptoms of being an internet addict:
      6. You refuse to go to a vacation spot with no electricity and no phone lines.

      /// Bram Moolenaar -- Bram@... -- http://www.Moolenaar.net \\\
      /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
      \\\ download, build and distribute -- http://www.A-A-P.org ///
      \\\ help me help AIDS victims -- http://ICCF-Holland.org ///

      --
      You received this message from the "vim_dev" maillist.
      For more information, visit http://www.vim.org/maillist.php
    Your message has been successfully submitted and would be delivered to recipients shortly.