Loading ...
Sorry, an error occurred while loading the content.

[PATCH] make adding new letters to arabic.c easier

Expand Messages
  • Ali Gholami Rudi
    Hi, This patch replaces switch statements in arabic.c with a static array of struct achars. This makes adding new letters a lot easier and that usually
    Message 1 of 46 , Jun 1, 2009
    • 0 Attachment
      Hi,

      This patch replaces switch statements in arabic.c with a static array of
      struct achars. This makes adding new letters a lot easier and that
      usually involves only adding an entry to this array (I've already added
      Farsi letters).

      This is also a clean up patch and reduces more than 1k lines. After
      applying this patch arabic.h holds only a macro definition. So it is
      probably a good idea to remove arabic.h completely and move the trivial
      ARABIC_CHAR macro to another header.

      Ali
      ---
      src/arabic.c | 1127 ++++++++++------------------------------------------------
      src/arabic.h | 243 +-------------
      src/mbyte.c | 30 --
      3 files changed, 189 insertions(+), 1211 deletions(-)

      diff --git a/src/arabic.c b/src/arabic.c
      --- a/src/arabic.c
      +++ b/src/arabic.c
      @@ -15,917 +15,156 @@
      * --
      *
      * Author: Nadim Shaikli & Isam Bayazidi
      + * Farsi support and restructuring to make adding new
      + * letters easier by Ali Gholami Rudi
      *
      */

      -static int A_is_a __ARGS((int cur_c));
      -static int A_is_s __ARGS((int cur_c));
      -static int A_is_f __ARGS((int cur_c));
      -static int chg_c_a2s __ARGS((int cur_c));
      -static int chg_c_a2i __ARGS((int cur_c));
      -static int chg_c_a2m __ARGS((int cur_c));
      -static int chg_c_a2f __ARGS((int cur_c));
      -static int chg_c_i2m __ARGS((int cur_c));
      -static int chg_c_f2m __ARGS((int cur_c));
      +/*
      + * Sorted list of unicode Arabic characters. Each entry holds the
      + * presentation forms of a letter.
      + *
      + * Arabic characters are categorized into following types:
      + *
      + * Isolated - iso-8859-6 form
      + * Initial - unicode form-B start
      + * Medial - unicode form-B middle
      + * Final - unicode form-B final
      + * Stand-Alone - unicode form-B isolated
      + *
      + * The fields in the struct represent:
      + *
      + * s -> isolated
      + * i -> initial
      + * m -> medial
      + * f -> final
      + *
      + */
      +static struct achar {
      + unsigned c;
      + unsigned s;
      + unsigned i;
      + unsigned m;
      + unsigned f;
      +} achars[] = {
      + {0x0621, 0xfe80}, /* a_HAMZA */
      + {0x0622, 0xfe81, 0, 0, 0xfe82}, /* a_ALEF_MADDA */
      + {0x0623, 0xfe83, 0, 0, 0xfe84}, /* a_ALEF_HAMZA_ABOVE */
      + {0x0624, 0xfe85, 0, 0, 0xfe85}, /* a_WAW_HAMZA */
      + {0x0625, 0xfe87, 0, 0, 0xfe88}, /* a_ALEF_HAMZA_BELOW */
      + {0x0626, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a}, /* a_YEH_HAMZA */
      + {0x0627, 0xfe8d, 0, 0, 0xfe8e}, /* a_ALEF */
      + {0x0628, 0xfe8f, 0xfe91, 0xfe92, 0xfe90}, /* a_BEH */
      + {0x0629, 0xfe93, 0, 0, 0xfe94}, /* a_TEH_MARBUTA */
      + {0x062a, 0xfe95, 0xfe97, 0xfe98, 0xfe96}, /* a_TEH */
      + {0x062b, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a}, /* a_THEH */
      + {0x062c, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e}, /* a_JEEM */
      + {0x062d, 0xfea1, 0xfea3, 0xfea4, 0xfea2}, /* a_HAH */
      + {0x062e, 0xfea5, 0xfea7, 0xfea8, 0xfea6}, /* a_KHAH */
      + {0x062f, 0xfea9, 0, 0, 0xfeaa}, /* a_DAL */
      + {0x0630, 0xfeab, 0, 0, 0xfeac}, /* a_THAL */
      + {0x0631, 0xfead, 0, 0, 0xfeae}, /* a_REH */
      + {0x0632, 0xfeaf, 0, 0, 0xfeb0}, /* a_ZAIN */
      + {0x0633, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2}, /* a_SEEN */
      + {0x0634, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6}, /* a_SHEEN */
      + {0x0635, 0xfeb9, 0xfebb, 0xfebc, 0xfeba}, /* a_SAD */
      + {0x0636, 0xfebd, 0xfebf, 0xfec0, 0xfebe}, /* a_DAD */
      + {0x0637, 0xfec1, 0xfec3, 0xfec4, 0xfec2}, /* a_TAH */
      + {0x0638, 0xfec5, 0xfec7, 0xfec8, 0xfec6}, /* a_ZAH */
      + {0x0639, 0xfec9, 0xfecb, 0xfecc, 0xfeca}, /* a_AIN */
      + {0x063a, 0xfecd, 0xfecf, 0xfed0, 0xfece}, /* a_GHAIN */
      + {0x0640}, /* a_TATWEEL */
      + {0x0641, 0xfed1, 0xfed3, 0xfed4, 0xfed2}, /* a_FEH */
      + {0x0642, 0xfed5, 0xfed7, 0xfed8, 0xfed6}, /* a_QAF */
      + {0x0643, 0xfed9, 0xfedb, 0xfedc, 0xfeda}, /* a_KAF */
      + {0x0644, 0xfedd, 0xfedf, 0xfee0, 0xfede}, /* a_LAM */
      + {0x0645, 0xfee1, 0xfee3, 0xfee4, 0xfee2}, /* a_MEEM */
      + {0x0646, 0xfee5, 0xfee7, 0xfee8, 0xfee6}, /* a_NOON */
      + {0x0647, 0xfee9, 0xfeeb, 0xfeec, 0xfeea}, /* a_HEH */
      + {0x0648, 0xfeed, 0, 0, 0xfeee}, /* a_WAW */
      + {0x0649, 0xfeef, 0, 0, 0xfef0}, /* a_ALEF_MAKSURA */
      + {0x064a, 0xfef1, 0xfef3, 0xfef4, 0xfef2}, /* a_YEH */
      + {0x064b, 0xfe70}, /* a_FATHATAN */
      + {0x064c, 0xfe72}, /* a_DAMMATAN */
      + {0x064d, 0xfe74}, /* a_KASRATAN */
      + {0x064e, 0xfe76, 0, 0xfe77, 0}, /* a_FATHA */
      + {0x064f, 0xfe78, 0, 0xfe79, 0}, /* a_DAMMA */
      + {0x0650, 0xfe7a, 0, 0xfe7b, 0}, /* a_KASRA */
      + {0x0651, 0xfe7c, 0, 0xfe7c, 0}, /* a_SHADDA */
      + {0x0652, 0xfe7e, 0, 0xfe7f, 0}, /* a_SUKUN */
      + {0x0653}, /* a_MADDA_ABOVE */
      + {0x0654}, /* a_HAMZA_ABOVE */
      + {0x0655}, /* a_HAMZA_BELOW */
      + {0x067e, 0xfb56, 0xfb58, 0xfb59, 0xfb57}, /* a_PEH */
      + {0x0686, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b}, /* a_TCHEH */
      + {0x0698, 0xfb8a, 0, 0, 0xfb8b}, /* a_JEH */
      + {0x06a9, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f}, /* a_FKAF */
      + {0x06af, 0xfb92, 0xfb94, 0xfb95, 0xfb93}, /* a_GAF */
      + {0x06cc, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd}, /* a_FYEH */
      +};
      +
      +/* these values are hardcoded in functions */
      +#define a_HAMZA 0x0621
      +#define a_ALEF_MADDA 0x0622
      +#define a_ALEF_HAMZA_ABOVE 0x0623
      +#define a_ALEF_HAMZA_BELOW 0x0625
      +#define a_ALEF 0x0627
      +#define a_LAM 0x0644
      +#define a_FATHATAN 0x064b
      +#define a_s_FATHATAN 0xfe70
      +#define a_s_DAMMATAN 0xfe72
      +#define a_s_KASRATAN 0xfe74
      +#define a_FATHA 0x064e
      +#define a_s_FATHA 0xfe76
      +#define a_s_HAMZA 0xfe81
      +#define a_SUKUN 0x0652
      +#define a_MINI_ALEF 0x0670
      +
      +#define a_s_LAM_ALEF_MADDA_ABOVE 0xfef5
      +#define a_f_LAM_ALEF_MADDA_ABOVE 0xfef6
      +#define a_s_LAM_ALEF_HAMZA_ABOVE 0xfef7
      +#define a_f_LAM_ALEF_HAMZA_ABOVE 0xfef8
      +#define a_s_LAM_ALEF_HAMZA_BELOW 0xfef9
      +#define a_f_LAM_ALEF_HAMZA_BELOW 0xfefa
      +#define a_s_LAM_ALEF 0xfefb
      +#define a_f_LAM_ALEF 0xfefc
      +
      +#define a_BYTE_ORDER_MARK 0xfeff
      +
      +#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
      +
      static int chg_c_laa2i __ARGS((int hid_c));
      static int chg_c_laa2f __ARGS((int hid_c));
      -static int half_shape __ARGS((int c));
      static int A_firstc_laa __ARGS((int c1, int c));
      static int A_is_harakat __ARGS((int c));
      static int A_is_iso __ARGS((int c));
      -static int A_is_formb __ARGS((int c));
      static int A_is_ok __ARGS((int c));
      static int A_is_valid __ARGS((int c));
      static int A_is_special __ARGS((int c));

      -
      -/*
      - * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
      - */
      - static int
      -A_is_a(cur_c)
      - int cur_c;
      -{
      - switch (cur_c)
      - {
      - case a_HAMZA:
      - case a_ALEF_MADDA:
      - case a_ALEF_HAMZA_ABOVE:
      - case a_WAW_HAMZA:
      - case a_ALEF_HAMZA_BELOW:
      - case a_YEH_HAMZA:
      - case a_ALEF:
      - case a_BEH:
      - case a_TEH_MARBUTA:
      - case a_TEH:
      - case a_THEH:
      - case a_JEEM:
      - case a_HAH:
      - case a_KHAH:
      - case a_DAL:
      - case a_THAL:
      - case a_REH:
      - case a_ZAIN:
      - case a_SEEN:
      - case a_SHEEN:
      - case a_SAD:
      - case a_DAD:
      - case a_TAH:
      - case a_ZAH:
      - case a_AIN:
      - case a_GHAIN:
      - case a_TATWEEL:
      - case a_FEH:
      - case a_QAF:
      - case a_KAF:
      - case a_LAM:
      - case a_MEEM:
      - case a_NOON:
      - case a_HEH:
      - case a_WAW:
      - case a_ALEF_MAKSURA:
      - case a_YEH:
      - return TRUE;
      - }
      -
      - return FALSE;
      -}
      -
      -
      -/*
      - * Returns True if c is an Isolated Form-B ARABIC letter
      - */
      - static int
      -A_is_s(cur_c)
      - int cur_c;
      -{
      - switch (cur_c)
      - {
      - case a_s_HAMZA:
      - case a_s_ALEF_MADDA:
      - case a_s_ALEF_HAMZA_ABOVE:
      - case a_s_WAW_HAMZA:
      - case a_s_ALEF_HAMZA_BELOW:
      - case a_s_YEH_HAMZA:
      - case a_s_ALEF:
      - case a_s_BEH:
      - case a_s_TEH_MARBUTA:
      - case a_s_TEH:
      - case a_s_THEH:
      - case a_s_JEEM:
      - case a_s_HAH:
      - case a_s_KHAH:
      - case a_s_DAL:
      - case a_s_THAL:
      - case a_s_REH:
      - case a_s_ZAIN:
      - case a_s_SEEN:
      - case a_s_SHEEN:
      - case a_s_SAD:
      - case a_s_DAD:
      - case a_s_TAH:
      - case a_s_ZAH:
      - case a_s_AIN:
      - case a_s_GHAIN:
      - case a_s_FEH:
      - case a_s_QAF:
      - case a_s_KAF:
      - case a_s_LAM:
      - case a_s_MEEM:
      - case a_s_NOON:
      - case a_s_HEH:
      - case a_s_WAW:
      - case a_s_ALEF_MAKSURA:
      - case a_s_YEH:
      - return TRUE;
      - }
      -
      - return FALSE;
      -}
      -
      -
      /*
      - * Returns True if c is a Final shape of an ARABIC letter
      + * Find the struct achar pointer to the given arabic char
      */
      - static int
      -A_is_f(cur_c)
      - int cur_c;
      -{
      - switch (cur_c)
      - {
      - case a_f_ALEF_MADDA:
      - case a_f_ALEF_HAMZA_ABOVE:
      - case a_f_WAW_HAMZA:
      - case a_f_ALEF_HAMZA_BELOW:
      - case a_f_YEH_HAMZA:
      - case a_f_ALEF:
      - case a_f_BEH:
      - case a_f_TEH_MARBUTA:
      - case a_f_TEH:
      - case a_f_THEH:
      - case a_f_JEEM:
      - case a_f_HAH:
      - case a_f_KHAH:
      - case a_f_DAL:
      - case a_f_THAL:
      - case a_f_REH:
      - case a_f_ZAIN:
      - case a_f_SEEN:
      - case a_f_SHEEN:
      - case a_f_SAD:
      - case a_f_DAD:
      - case a_f_TAH:
      - case a_f_ZAH:
      - case a_f_AIN:
      - case a_f_GHAIN:
      - case a_f_FEH:
      - case a_f_QAF:
      - case a_f_KAF:
      - case a_f_LAM:
      - case a_f_MEEM:
      - case a_f_NOON:
      - case a_f_HEH:
      - case a_f_WAW:
      - case a_f_ALEF_MAKSURA:
      - case a_f_YEH:
      - case a_f_LAM_ALEF_MADDA_ABOVE:
      - case a_f_LAM_ALEF_HAMZA_ABOVE:
      - case a_f_LAM_ALEF_HAMZA_BELOW:
      - case a_f_LAM_ALEF:
      - return TRUE;
      - }
      - return FALSE;
      -}
      -
      -
      -/*
      - * Change shape - from ISO-8859-6/Isolated to Form-B Isolated
      - */
      - static int
      -chg_c_a2s(cur_c)
      - int cur_c;
      -{
      - int tempc;
      -
      - switch (cur_c)
      - {
      - case a_HAMZA:
      - tempc = a_s_HAMZA;
      - break;
      - case a_ALEF_MADDA:
      - tempc = a_s_ALEF_MADDA;
      - break;
      - case a_ALEF_HAMZA_ABOVE:
      - tempc = a_s_ALEF_HAMZA_ABOVE;
      - break;
      - case a_WAW_HAMZA:
      - tempc = a_s_WAW_HAMZA;
      - break;
      - case a_ALEF_HAMZA_BELOW:
      - tempc = a_s_ALEF_HAMZA_BELOW;
      - break;
      - case a_YEH_HAMZA:
      - tempc = a_s_YEH_HAMZA;
      - break;
      - case a_ALEF:
      - tempc = a_s_ALEF;
      - break;
      - case a_TEH_MARBUTA:
      - tempc = a_s_TEH_MARBUTA;
      - break;
      - case a_DAL:
      - tempc = a_s_DAL;
      - break;
      - case a_THAL:
      - tempc = a_s_THAL;
      - break;
      - case a_REH:
      - tempc = a_s_REH;
      - break;
      - case a_ZAIN:
      - tempc = a_s_ZAIN;
      - break;
      - case a_TATWEEL: /* exceptions */
      - tempc = cur_c;
      - break;
      - case a_WAW:
      - tempc = a_s_WAW;
      - break;
      - case a_ALEF_MAKSURA:
      - tempc = a_s_ALEF_MAKSURA;
      - break;
      - case a_BEH:
      - tempc = a_s_BEH;
      - break;
      - case a_TEH:
      - tempc = a_s_TEH;
      - break;
      - case a_THEH:
      - tempc = a_s_THEH;
      - break;
      - case a_JEEM:
      - tempc = a_s_JEEM;
      - break;
      - case a_HAH:
      - tempc = a_s_HAH;
      - break;
      - case a_KHAH:
      - tempc = a_s_KHAH;
      - break;
      - case a_SEEN:
      - tempc = a_s_SEEN;
      - break;
      - case a_SHEEN:
      - tempc = a_s_SHEEN;
      - break;
      - case a_SAD:
      - tempc = a_s_SAD;
      - break;
      - case a_DAD:
      - tempc = a_s_DAD;
      - break;
      - case a_TAH:
      - tempc = a_s_TAH;
      - break;
      - case a_ZAH:
      - tempc = a_s_ZAH;
      - break;
      - case a_AIN:
      - tempc = a_s_AIN;
      - break;
      - case a_GHAIN:
      - tempc = a_s_GHAIN;
      - break;
      - case a_FEH:
      - tempc = a_s_FEH;
      - break;
      - case a_QAF:
      - tempc = a_s_QAF;
      - break;
      - case a_KAF:
      - tempc = a_s_KAF;
      - break;
      - case a_LAM:
      - tempc = a_s_LAM;
      - break;
      - case a_MEEM:
      - tempc = a_s_MEEM;
      - break;
      - case a_NOON:
      - tempc = a_s_NOON;
      - break;
      - case a_HEH:
      - tempc = a_s_HEH;
      - break;
      - case a_YEH:
      - tempc = a_s_YEH;
      - break;
      - default:
      - tempc = 0;
      - }
      -
      - return tempc;
      -}
      -
      -
      -/*
      - * Change shape - from ISO-8859-6/Isolated to Initial
      - */
      - static int
      -chg_c_a2i(cur_c)
      - int cur_c;
      -{
      - int tempc;
      -
      - switch (cur_c)
      - {
      - case a_YEH_HAMZA:
      - tempc = a_i_YEH_HAMZA;
      - break;
      - case a_HAMZA: /* exceptions */
      - tempc = a_s_HAMZA;
      - break;
      - case a_ALEF_MADDA: /* exceptions */
      - tempc = a_s_ALEF_MADDA;
      - break;
      - case a_ALEF_HAMZA_ABOVE: /* exceptions */
      - tempc = a_s_ALEF_HAMZA_ABOVE;
      - break;
      - case a_WAW_HAMZA: /* exceptions */
      - tempc = a_s_WAW_HAMZA;
      - break;
      - case a_ALEF_HAMZA_BELOW: /* exceptions */
      - tempc = a_s_ALEF_HAMZA_BELOW;
      - break;
      - case a_ALEF: /* exceptions */
      - tempc = a_s_ALEF;
      - break;
      - case a_TEH_MARBUTA: /* exceptions */
      - tempc = a_s_TEH_MARBUTA;
      - break;
      - case a_DAL: /* exceptions */
      - tempc = a_s_DAL;
      - break;
      - case a_THAL: /* exceptions */
      - tempc = a_s_THAL;
      - break;
      - case a_REH: /* exceptions */
      - tempc = a_s_REH;
      - break;
      - case a_ZAIN: /* exceptions */
      - tempc = a_s_ZAIN;
      - break;
      - case a_TATWEEL: /* exceptions */
      - tempc = cur_c;
      - break;
      - case a_WAW: /* exceptions */
      - tempc = a_s_WAW;
      - break;
      - case a_ALEF_MAKSURA: /* exceptions */
      - tempc = a_s_ALEF_MAKSURA;
      - break;
      - case a_BEH:
      - tempc = a_i_BEH;
      - break;
      - case a_TEH:
      - tempc = a_i_TEH;
      - break;
      - case a_THEH:
      - tempc = a_i_THEH;
      - break;
      - case a_JEEM:
      - tempc = a_i_JEEM;
      - break;
      - case a_HAH:
      - tempc = a_i_HAH;
      - break;
      - case a_KHAH:
      - tempc = a_i_KHAH;
      - break;
      - case a_SEEN:
      - tempc = a_i_SEEN;
      - break;
      - case a_SHEEN:
      - tempc = a_i_SHEEN;
      - break;
      - case a_SAD:
      - tempc = a_i_SAD;
      - break;
      - case a_DAD:
      - tempc = a_i_DAD;
      - break;
      - case a_TAH:
      - tempc = a_i_TAH;
      - break;
      - case a_ZAH:
      - tempc = a_i_ZAH;
      - break;
      - case a_AIN:
      - tempc = a_i_AIN;
      - break;
      - case a_GHAIN:
      - tempc = a_i_GHAIN;
      - break;
      - case a_FEH:
      - tempc = a_i_FEH;
      - break;
      - case a_QAF:
      - tempc = a_i_QAF;
      - break;
      - case a_KAF:
      - tempc = a_i_KAF;
      - break;
      - case a_LAM:
      - tempc = a_i_LAM;
      - break;
      - case a_MEEM:
      - tempc = a_i_MEEM;
      - break;
      - case a_NOON:
      - tempc = a_i_NOON;
      - break;
      - case a_HEH:
      - tempc = a_i_HEH;
      - break;
      - case a_YEH:
      - tempc = a_i_YEH;
      - break;
      - default:
      - tempc = 0;
      - }
      -
      - return tempc;
      -}
      -
      -
      -/*
      - * Change shape - from ISO-8859-6/Isolated to Medial
      - */
      - static int
      -chg_c_a2m(cur_c)
      - int cur_c;
      -{
      - int tempc;
      -
      - switch (cur_c)
      - {
      - case a_HAMZA: /* exception */
      - tempc = a_s_HAMZA;
      - break;
      - case a_ALEF_MADDA: /* exception */
      - tempc = a_f_ALEF_MADDA;
      - break;
      - case a_ALEF_HAMZA_ABOVE: /* exception */
      - tempc = a_f_ALEF_HAMZA_ABOVE;
      - break;
      - case a_WAW_HAMZA: /* exception */
      - tempc = a_f_WAW_HAMZA;
      - break;
      - case a_ALEF_HAMZA_BELOW: /* exception */
      - tempc = a_f_ALEF_HAMZA_BELOW;
      - break;
      - case a_YEH_HAMZA:
      - tempc = a_m_YEH_HAMZA;
      - break;
      - case a_ALEF: /* exception */
      - tempc = a_f_ALEF;
      - break;
      - case a_BEH:
      - tempc = a_m_BEH;
      - break;
      - case a_TEH_MARBUTA: /* exception */
      - tempc = a_f_TEH_MARBUTA;
      - break;
      - case a_TEH:
      - tempc = a_m_TEH;
      - break;
      - case a_THEH:
      - tempc = a_m_THEH;
      - break;
      - case a_JEEM:
      - tempc = a_m_JEEM;
      - break;
      - case a_HAH:
      - tempc = a_m_HAH;
      - break;
      - case a_KHAH:
      - tempc = a_m_KHAH;
      - break;
      - case a_DAL: /* exception */
      - tempc = a_f_DAL;
      - break;
      - case a_THAL: /* exception */
      - tempc = a_f_THAL;
      - break;
      - case a_REH: /* exception */
      - tempc = a_f_REH;
      - break;
      - case a_ZAIN: /* exception */
      - tempc = a_f_ZAIN;
      - break;
      - case a_SEEN:
      - tempc = a_m_SEEN;
      - break;
      - case a_SHEEN:
      - tempc = a_m_SHEEN;
      - break;
      - case a_SAD:
      - tempc = a_m_SAD;
      - break;
      - case a_DAD:
      - tempc = a_m_DAD;
      - break;
      - case a_TAH:
      - tempc = a_m_TAH;
      - break;
      - case a_ZAH:
      - tempc = a_m_ZAH;
      - break;
      - case a_AIN:
      - tempc = a_m_AIN;
      - break;
      - case a_GHAIN:
      - tempc = a_m_GHAIN;
      - break;
      - case a_TATWEEL: /* exception */
      - tempc = cur_c;
      - break;
      - case a_FEH:
      - tempc = a_m_FEH;
      - break;
      - case a_QAF:
      - tempc = a_m_QAF;
      - break;
      - case a_KAF:
      - tempc = a_m_KAF;
      - break;
      - case a_LAM:
      - tempc = a_m_LAM;
      - break;
      - case a_MEEM:
      - tempc = a_m_MEEM;
      - break;
      - case a_NOON:
      - tempc = a_m_NOON;
      - break;
      - case a_HEH:
      - tempc = a_m_HEH;
      - break;
      - case a_WAW: /* exception */
      - tempc = a_f_WAW;
      - break;
      - case a_ALEF_MAKSURA: /* exception */
      - tempc = a_f_ALEF_MAKSURA;
      - break;
      - case a_YEH:
      - tempc = a_m_YEH;
      - break;
      - default:
      - tempc = 0;
      - }
      -
      - return tempc;
      -}
      -
      -
      -/*
      - * Change shape - from ISO-8859-6/Isolated to final
      - */
      - static int
      -chg_c_a2f(cur_c)
      - int cur_c;
      -{
      - int tempc;
      -
      - /* NOTE: these encodings need to be accounted for
      -
      - a_f_ALEF_MADDA;
      - a_f_ALEF_HAMZA_ABOVE;
      - a_f_ALEF_HAMZA_BELOW;
      - a_f_LAM_ALEF_MADDA_ABOVE;
      - a_f_LAM_ALEF_HAMZA_ABOVE;
      - a_f_LAM_ALEF_HAMZA_BELOW;
      - */
      -
      - switch (cur_c)
      - {
      - case a_HAMZA: /* exception */
      - tempc = a_s_HAMZA;
      - break;
      - case a_ALEF_MADDA:
      - tempc = a_f_ALEF_MADDA;
      - break;
      - case a_ALEF_HAMZA_ABOVE:
      - tempc = a_f_ALEF_HAMZA_ABOVE;
      - break;
      - case a_WAW_HAMZA:
      - tempc = a_f_WAW_HAMZA;
      - break;
      - case a_ALEF_HAMZA_BELOW:
      - tempc = a_f_ALEF_HAMZA_BELOW;
      - break;
      - case a_YEH_HAMZA:
      - tempc = a_f_YEH_HAMZA;
      - break;
      - case a_ALEF:
      - tempc = a_f_ALEF;
      - break;
      - case a_BEH:
      - tempc = a_f_BEH;
      - break;
      - case a_TEH_MARBUTA:
      - tempc = a_f_TEH_MARBUTA;
      - break;
      - case a_TEH:
      - tempc = a_f_TEH;
      - break;
      - case a_THEH:
      - tempc = a_f_THEH;
      - break;
      - case a_JEEM:
      - tempc = a_f_JEEM;
      - break;
      - case a_HAH:
      - tempc = a_f_HAH;
      - break;
      - case a_KHAH:
      - tempc = a_f_KHAH;
      - break;
      - case a_DAL:
      - tempc = a_f_DAL;
      - break;
      - case a_THAL:
      - tempc = a_f_THAL;
      - break;
      - case a_REH:
      - tempc = a_f_REH;
      - break;
      - case a_ZAIN:
      - tempc = a_f_ZAIN;
      - break;
      - case a_SEEN:
      - tempc = a_f_SEEN;
      - break;
      - case a_SHEEN:
      - tempc = a_f_SHEEN;
      - break;
      - case a_SAD:
      - tempc = a_f_SAD;
      - break;
      - case a_DAD:
      - tempc = a_f_DAD;
      - break;
      - case a_TAH:
      - tempc = a_f_TAH;
      - break;
      - case a_ZAH:
      - tempc = a_f_ZAH;
      - break;
      - case a_AIN:
      - tempc = a_f_AIN;
      - break;
      - case a_GHAIN:
      - tempc = a_f_GHAIN;
      - break;
      - case a_TATWEEL: /* exception */
      - tempc = cur_c;
      - break;
      - case a_FEH:
      - tempc = a_f_FEH;
      - break;
      - case a_QAF:
      - tempc = a_f_QAF;
      - break;
      - case a_KAF:
      - tempc = a_f_KAF;
      - break;
      - case a_LAM:
      - tempc = a_f_LAM;
      - break;
      - case a_MEEM:
      - tempc = a_f_MEEM;
      - break;
      - case a_NOON:
      - tempc = a_f_NOON;
      - break;
      - case a_HEH:
      - tempc = a_f_HEH;
      - break;
      - case a_WAW:
      - tempc = a_f_WAW;
      - break;
      - case a_ALEF_MAKSURA:
      - tempc = a_f_ALEF_MAKSURA;
      - break;
      - case a_YEH:
      - tempc = a_f_YEH;
      - break;
      - default:
      - tempc = 0;
      - }
      -
      - return tempc;
      -}
      -
      -
      -/*
      - * Change shape - from Initial to Medial
      - */
      - static int
      -chg_c_i2m(cur_c)
      - int cur_c;
      -{
      - int tempc;
      -
      - switch (cur_c)
      - {
      - case a_i_YEH_HAMZA:
      - tempc = a_m_YEH_HAMZA;
      - break;
      - case a_i_BEH:
      - tempc = a_m_BEH;
      - break;
      - case a_i_TEH:
      - tempc = a_m_TEH;
      - break;
      - case a_i_THEH:
      - tempc = a_m_THEH;
      - break;
      - case a_i_JEEM:
      - tempc = a_m_JEEM;
      - break;
      - case a_i_HAH:
      - tempc = a_m_HAH;
      - break;
      - case a_i_KHAH:
      - tempc = a_m_KHAH;
      - break;
      - case a_i_SEEN:
      - tempc = a_m_SEEN;
      - break;
      - case a_i_SHEEN:
      - tempc = a_m_SHEEN;
      - break;
      - case a_i_SAD:
      - tempc = a_m_SAD;
      - break;
      - case a_i_DAD:
      - tempc = a_m_DAD;
      - break;
      - case a_i_TAH:
      - tempc = a_m_TAH;
      - break;
      - case a_i_ZAH:
      - tempc = a_m_ZAH;
      - break;
      - case a_i_AIN:
      - tempc = a_m_AIN;
      - break;
      - case a_i_GHAIN:
      - tempc = a_m_GHAIN;
      - break;
      - case a_i_FEH:
      - tempc = a_m_FEH;
      - break;
      - case a_i_QAF:
      - tempc = a_m_QAF;
      - break;
      - case a_i_KAF:
      - tempc = a_m_KAF;
      - break;
      - case a_i_LAM:
      - tempc = a_m_LAM;
      - break;
      - case a_i_MEEM:
      - tempc = a_m_MEEM;
      - break;
      - case a_i_NOON:
      - tempc = a_m_NOON;
      - break;
      - case a_i_HEH:
      - tempc = a_m_HEH;
      - break;
      - case a_i_YEH:
      - tempc = a_m_YEH;
      - break;
      - default:
      - tempc = 0;
      - }
      -
      - return tempc;
      -}
      -
      -
      -/*
      - * Change shape - from Final to Medial
      - */
      - static int
      -chg_c_f2m(cur_c)
      - int cur_c;
      + static struct achar *
      +find_achar(c)
      + int c;
      {
      - int tempc;
      -
      - switch (cur_c)
      - {
      - /* NOTE: these encodings are multi-positional, no ?
      - case a_f_ALEF_MADDA:
      - case a_f_ALEF_HAMZA_ABOVE:
      - case a_f_ALEF_HAMZA_BELOW:
      - */
      - case a_f_YEH_HAMZA:
      - tempc = a_m_YEH_HAMZA;
      - break;
      - case a_f_WAW_HAMZA: /* exceptions */
      - case a_f_ALEF:
      - case a_f_TEH_MARBUTA:
      - case a_f_DAL:
      - case a_f_THAL:
      - case a_f_REH:
      - case a_f_ZAIN:
      - case a_f_WAW:
      - case a_f_ALEF_MAKSURA:
      - tempc = cur_c;
      - break;
      - case a_f_BEH:
      - tempc = a_m_BEH;
      - break;
      - case a_f_TEH:
      - tempc = a_m_TEH;
      - break;
      - case a_f_THEH:
      - tempc = a_m_THEH;
      - break;
      - case a_f_JEEM:
      - tempc = a_m_JEEM;
      - break;
      - case a_f_HAH:
      - tempc = a_m_HAH;
      - break;
      - case a_f_KHAH:
      - tempc = a_m_KHAH;
      - break;
      - case a_f_SEEN:
      - tempc = a_m_SEEN;
      - break;
      - case a_f_SHEEN:
      - tempc = a_m_SHEEN;
      - break;
      - case a_f_SAD:
      - tempc = a_m_SAD;
      - break;
      - case a_f_DAD:
      - tempc = a_m_DAD;
      - break;
      - case a_f_TAH:
      - tempc = a_m_TAH;
      - break;
      - case a_f_ZAH:
      - tempc = a_m_ZAH;
      - break;
      - case a_f_AIN:
      - tempc = a_m_AIN;
      - break;
      - case a_f_GHAIN:
      - tempc = a_m_GHAIN;
      - break;
      - case a_f_FEH:
      - tempc = a_m_FEH;
      - break;
      - case a_f_QAF:
      - tempc = a_m_QAF;
      - break;
      - case a_f_KAF:
      - tempc = a_m_KAF;
      - break;
      - case a_f_LAM:
      - tempc = a_m_LAM;
      - break;
      - case a_f_MEEM:
      - tempc = a_m_MEEM;
      - break;
      - case a_f_NOON:
      - tempc = a_m_NOON;
      - break;
      - case a_f_HEH:
      - tempc = a_m_HEH;
      - break;
      - case a_f_YEH:
      - tempc = a_m_YEH;
      - break;
      - /* NOTE: these encodings are multi-positional, no ?
      - case a_f_LAM_ALEF_MADDA_ABOVE:
      - case a_f_LAM_ALEF_HAMZA_ABOVE:
      - case a_f_LAM_ALEF_HAMZA_BELOW:
      - case a_f_LAM_ALEF:
      - */
      - default:
      - tempc = 0;
      + int h, m, l;
      + h = ARRAY_SIZE(achars);
      + l = 0;
      + /* using binary search to find c */
      + while (l < h) {
      + m = (h + l) / 2;
      + if (achars[m].c == c)
      + return &achars[m];
      + if (c < achars[m].c)
      + h = m;
      + else
      + l = m + 1;
      }
      -
      - return tempc;
      + return NULL;
      }

      -
      /*
      * Change shape - from Combination (2 char) to an Isolated
      */
      @@ -988,17 +227,16 @@ chg_c_laa2f(hid_c)
      }

      /*
      - * Do "half-shaping" on character "c". Return zero if no shaping.
      + * Returns whether it is possible to join the given letters
      */
      static int
      -half_shape(c)
      - int c;
      +can_join(c1, c2)
      + int c1;
      + int c2;
      {
      - if (A_is_a(c))
      - return chg_c_a2i(c);
      - if (A_is_valid(c) && A_is_f(c))
      - return chg_c_f2m(c);
      - return 0;
      + struct achar *a1 = find_achar(c1);
      + struct achar *a2 = find_achar(c2);
      + return a1 && a2 && (a1->i || a1->m) && (a2->f || a2->m);
      }

      /*
      @@ -1019,18 +257,14 @@ arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
      int prev_c1;
      int next_c;
      {
      - int curr_c;
      - int shape_c;
      - int curr_laa;
      - int prev_laa;
      + int curr_c;
      + int curr_laa;
      + int prev_laa;

      /* Deal only with Arabic character, pass back all others */
      if (!A_is_ok(c))
      return c;

      - /* half-shape current and previous character */
      - shape_c = half_shape(prev_c);
      -
      /* Save away current character */
      curr_c = c;

      @@ -1039,8 +273,7 @@ arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)

      if (curr_laa)
      {
      - if (A_is_valid(prev_c) && !A_is_f(shape_c)
      - && !A_is_s(shape_c) && !prev_laa)
      + if (A_is_valid(prev_c) && can_join(prev_c, a_LAM) && !prev_laa)
      curr_c = chg_c_laa2f(curr_laa);
      else
      curr_c = chg_c_laa2i(curr_laa);
      @@ -1048,17 +281,20 @@ arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
      /* Remove the composing character */
      *c1p = 0;
      }
      - else if (!A_is_valid(prev_c) && A_is_valid(next_c))
      - curr_c = chg_c_a2i(c);
      - else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
      - curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
      - else if (A_is_valid(next_c))
      - curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
      - else if (A_is_valid(prev_c))
      - curr_c = chg_c_a2f(c);
      else
      - curr_c = chg_c_a2s(c);
      -
      + {
      + struct achar *curr_a = find_achar(c);
      + int backward_combine = !prev_laa && can_join(prev_c, curr_c);
      + int forward_combine = can_join(curr_c, next_c);
      + if (backward_combine && forward_combine)
      + curr_c = curr_a->m;
      + if (backward_combine && !forward_combine)
      + curr_c = curr_a->f;
      + if (!backward_combine && forward_combine)
      + curr_c = curr_a->i;
      + if (!backward_combine && !forward_combine)
      + curr_c = curr_a->s;
      + }
      /* Sanity check -- curr_c should, in the future, never be 0.
      * We should, in the future, insert a fatal error here. */
      if (curr_c == NUL)
      @@ -1112,24 +348,7 @@ A_is_harakat(c)
      A_is_iso(c)
      int c;
      {
      - return ((c >= a_HAMZA && c <= a_GHAIN)
      - || (c >= a_TATWEEL && c <= a_HAMZA_BELOW)
      - || c == a_MINI_ALEF);
      -}
      -
      -
      -/*
      - * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
      - * (alphabet/number/punctuation)
      - */
      - static int
      -A_is_formb(c)
      - int c;
      -{
      - return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN)
      - || c == a_s_KASRATAN
      - || (c >= a_s_FATHA && c <= a_f_LAM_ALEF)
      - || c == a_BYTE_ORDER_MARK);
      + return find_achar(c) != NULL;
      }


      @@ -1140,7 +359,7 @@ A_is_formb(c)
      A_is_ok(c)
      int c;
      {
      - return (A_is_iso(c) || A_is_formb(c));
      + return (A_is_iso(c) || c == a_BYTE_ORDER_MARK);
      }


      @@ -1166,3 +385,33 @@ A_is_special(c)
      {
      return (c == a_HAMZA || c == a_s_HAMZA);
      }
      +
      +/*
      + * Check whether we are dealing with Arabic combining characters.
      + * Note: these are NOT really composing characters!
      + */
      + int
      +arabic_combine(one, two)
      + int one; /* first character */
      + int two; /* character just after "one" */
      +{
      + if (one == a_LAM)
      + return arabic_maycombine(two);
      + return FALSE;
      +}
      +
      +/*
      + * Check whether we are dealing with a character that could be regarded as an
      + * Arabic combining character, need to check the character before this.
      + */
      + int
      +arabic_maycombine(two)
      + int two;
      +{
      + if (p_arshape && !p_tbidi)
      + return (two == a_ALEF_MADDA
      + || two == a_ALEF_HAMZA_ABOVE
      + || two == a_ALEF_HAMZA_BELOW
      + || two == a_ALEF);
      + return FALSE;
      +}
      diff --git a/src/arabic.h b/src/arabic.h
      --- a/src/arabic.h
      +++ b/src/arabic.h
      @@ -7,252 +7,11 @@
      */

      /*
      - * Arabic characters are categorized into following types:
      - *
      - * Isolated - iso-8859-6 form char denoted with a_*
      - * Initial - unicode form-B start char denoted with a_i_*
      - * Medial - unicode form-B middle char denoted with a_m_*
      - * Final - unicode form-B final char denoted with a_f_*
      - * Stand-Alone - unicode form-B isolated char denoted with a_s_* (NOT USED)
      - *
      - * --
      *
      * Author: Nadim Shaikli & Isam Bayazidi
      * - (based on Unicode)
      *
      */

      -/*
      - * Arabic ISO-10646-1 character set definition
      - */
      -
      -/*
      - * Arabic ISO-8859-6 (subset of 10646; 0600 - 06FF)
      - */
      -#define a_COMMA 0x060C
      -#define a_SEMICOLON 0x061B
      -#define a_QUESTION 0x061F
      -#define a_HAMZA 0x0621
      -#define a_ALEF_MADDA 0x0622
      -#define a_ALEF_HAMZA_ABOVE 0x0623
      -#define a_WAW_HAMZA 0x0624
      -#define a_ALEF_HAMZA_BELOW 0x0625
      -#define a_YEH_HAMZA 0x0626
      -#define a_ALEF 0x0627
      -#define a_BEH 0x0628
      -#define a_TEH_MARBUTA 0x0629
      -#define a_TEH 0x062a
      -#define a_THEH 0x062b
      -#define a_JEEM 0x062c
      -#define a_HAH 0x062d
      -#define a_KHAH 0x062e
      -#define a_DAL 0x062f
      -#define a_THAL 0x0630
      -#define a_REH 0x0631
      -#define a_ZAIN 0x0632
      -#define a_SEEN 0x0633
      -#define a_SHEEN 0x0634
      -#define a_SAD 0x0635
      -#define a_DAD 0x0636
      -#define a_TAH 0x0637
      -#define a_ZAH 0x0638
      -#define a_AIN 0x0639
      -#define a_GHAIN 0x063a
      -#define a_TATWEEL 0x0640
      -#define a_FEH 0x0641
      -#define a_QAF 0x0642
      -#define a_KAF 0x0643
      -#define a_LAM 0x0644
      -#define a_MEEM 0x0645
      -#define a_NOON 0x0646
      -#define a_HEH 0x0647
      -#define a_WAW 0x0648
      -#define a_ALEF_MAKSURA 0x0649
      -#define a_YEH 0x064a
      -
      -#define a_FATHATAN 0x064b
      -#define a_DAMMATAN 0x064c
      -#define a_KASRATAN 0x064d
      -#define a_FATHA 0x064e
      -#define a_DAMMA 0x064f
      -#define a_KASRA 0x0650
      -#define a_SHADDA 0x0651
      -#define a_SUKUN 0x0652
      -
      -#define a_MADDA_ABOVE 0x0653
      -#define a_HAMZA_ABOVE 0x0654
      -#define a_HAMZA_BELOW 0x0655
      -
      -#define a_ZERO 0x0660
      -#define a_ONE 0x0661
      -#define a_TWO 0x0662
      -#define a_THREE 0x0663
      -#define a_FOUR 0x0664
      -#define a_FIVE 0x0665
      -#define a_SIX 0x0666
      -#define a_SEVEN 0x0667
      -#define a_EIGHT 0x0668
      -#define a_NINE 0x0669
      -#define a_PERCENT 0x066a
      -#define a_DECIMAL 0x066b
      -#define a_THOUSANDS 0x066c
      -#define a_STAR 0x066d
      -#define a_MINI_ALEF 0x0670
      -/* Rest of 8859-6 does not relate to Arabic */
      -
      -/*
      - * Arabic Presentation Form-B (subset of 10646; FE70 - FEFF)
      - *
      - * s -> isolated
      - * i -> initial
      - * m -> medial
      - * f -> final
      - *
      - */
      -#define a_s_FATHATAN 0xfe70
      -#define a_m_TATWEEL_FATHATAN 0xfe71
      -#define a_s_DAMMATAN 0xfe72
      -
      -#define a_s_KASRATAN 0xfe74
      -
      -#define a_s_FATHA 0xfe76
      -#define a_m_FATHA 0xfe77
      -#define a_s_DAMMA 0xfe78
      -#define a_m_DAMMA 0xfe79
      -#define a_s_KASRA 0xfe7a
      -#define a_m_KASRA 0xfe7b
      -#define a_s_SHADDA 0xfe7c
      -#define a_m_SHADDA 0xfe7d
      -#define a_s_SUKUN 0xfe7e
      -#define a_m_SUKUN 0xfe7f
      -
      -#define a_s_HAMZA 0xfe80
      -#define a_s_ALEF_MADDA 0xfe81
      -#define a_f_ALEF_MADDA 0xfe82
      -#define a_s_ALEF_HAMZA_ABOVE 0xfe83
      -#define a_f_ALEF_HAMZA_ABOVE 0xfe84
      -#define a_s_WAW_HAMZA 0xfe85
      -#define a_f_WAW_HAMZA 0xfe86
      -#define a_s_ALEF_HAMZA_BELOW 0xfe87
      -#define a_f_ALEF_HAMZA_BELOW 0xfe88
      -#define a_s_YEH_HAMZA 0xfe89
      -#define a_f_YEH_HAMZA 0xfe8a
      -#define a_i_YEH_HAMZA 0xfe8b
      -#define a_m_YEH_HAMZA 0xfe8c
      -#define a_s_ALEF 0xfe8d
      -#define a_f_ALEF 0xfe8e
      -#define a_s_BEH 0xfe8f
      -#define a_f_BEH 0xfe90
      -#define a_i_BEH 0xfe91
      -#define a_m_BEH 0xfe92
      -#define a_s_TEH_MARBUTA 0xfe93
      -#define a_f_TEH_MARBUTA 0xfe94
      -#define a_s_TEH 0xfe95
      -#define a_f_TEH 0xfe96
      -#define a_i_TEH 0xfe97
      -#define a_m_TEH 0xfe98
      -#define a_s_THEH 0xfe99
      -#define a_f_THEH 0xfe9a
      -#define a_i_THEH 0xfe9b
      -#define a_m_THEH 0xfe9c
      -#define a_s_JEEM 0xfe9d
      -#define a_f_JEEM 0xfe9e
      -#define a_i_JEEM 0xfe9f
      -#define a_m_JEEM 0xfea0
      -#define a_s_HAH 0xfea1
      -#define a_f_HAH 0xfea2
      -#define a_i_HAH 0xfea3
      -#define a_m_HAH 0xfea4
      -#define a_s_KHAH 0xfea5
      -#define a_f_KHAH 0xfea6
      -#define a_i_KHAH 0xfea7
      -#define a_m_KHAH 0xfea8
      -#define a_s_DAL 0xfea9
      -#define a_f_DAL 0xfeaa
      -#define a_s_THAL 0xfeab
      -#define a_f_THAL 0xfeac
      -#define a_s_REH 0xfead
      -#define a_f_REH 0xfeae
      -#define a_s_ZAIN 0xfeaf
      -#define a_f_ZAIN 0xfeb0
      -#define a_s_SEEN 0xfeb1
      -#define a_f_SEEN 0xfeb2
      -#define a_i_SEEN 0xfeb3
      -#define a_m_SEEN 0xfeb4
      -#define a_s_SHEEN 0xfeb5
      -#define a_f_SHEEN 0xfeb6
      -#define a_i_SHEEN 0xfeb7
      -#define a_m_SHEEN 0xfeb8
      -#define a_s_SAD 0xfeb9
      -#define a_f_SAD 0xfeba
      -#define a_i_SAD 0xfebb
      -#define a_m_SAD 0xfebc
      -#define a_s_DAD 0xfebd
      -#define a_f_DAD 0xfebe
      -#define a_i_DAD 0xfebf
      -#define a_m_DAD 0xfec0
      -#define a_s_TAH 0xfec1
      -#define a_f_TAH 0xfec2
      -#define a_i_TAH 0xfec3
      -#define a_m_TAH 0xfec4
      -#define a_s_ZAH 0xfec5
      -#define a_f_ZAH 0xfec6
      -#define a_i_ZAH 0xfec7
      -#define a_m_ZAH 0xfec8
      -#define a_s_AIN 0xfec9
      -#define a_f_AIN 0xfeca
      -#define a_i_AIN 0xfecb
      -#define a_m_AIN 0xfecc
      -#define a_s_GHAIN 0xfecd
      -#define a_f_GHAIN 0xfece
      -#define a_i_GHAIN 0xfecf
      -#define a_m_GHAIN 0xfed0
      -#define a_s_FEH 0xfed1
      -#define a_f_FEH 0xfed2
      -#define a_i_FEH 0xfed3
      -#define a_m_FEH 0xfed4
      -#define a_s_QAF 0xfed5
      -#define a_f_QAF 0xfed6
      -#define a_i_QAF 0xfed7
      -#define a_m_QAF 0xfed8
      -#define a_s_KAF 0xfed9
      -#define a_f_KAF 0xfeda
      -#define a_i_KAF 0xfedb
      -#define a_m_KAF 0xfedc
      -#define a_s_LAM 0xfedd
      -#define a_f_LAM 0xfede
      -#define a_i_LAM 0xfedf
      -#define a_m_LAM 0xfee0
      -#define a_s_MEEM 0xfee1
      -#define a_f_MEEM 0xfee2
      -#define a_i_MEEM 0xfee3
      -#define a_m_MEEM 0xfee4
      -#define a_s_NOON 0xfee5
      -#define a_f_NOON 0xfee6
      -#define a_i_NOON 0xfee7
      -#define a_m_NOON 0xfee8
      -#define a_s_HEH 0xfee9
      -#define a_f_HEH 0xfeea
      -#define a_i_HEH 0xfeeb
      -#define a_m_HEH 0xfeec
      -#define a_s_WAW 0xfeed
      -#define a_f_WAW 0xfeee
      -#define a_s_ALEF_MAKSURA 0xfeef
      -#define a_f_ALEF_MAKSURA 0xfef0
      -#define a_s_YEH 0xfef1
      -#define a_f_YEH 0xfef2
      -#define a_i_YEH 0xfef3
      -#define a_m_YEH 0xfef4
      -#define a_s_LAM_ALEF_MADDA_ABOVE 0xfef5
      -#define a_f_LAM_ALEF_MADDA_ABOVE 0xfef6
      -#define a_s_LAM_ALEF_HAMZA_ABOVE 0xfef7
      -#define a_f_LAM_ALEF_HAMZA_ABOVE 0xfef8
      -#define a_s_LAM_ALEF_HAMZA_BELOW 0xfef9
      -#define a_f_LAM_ALEF_HAMZA_BELOW 0xfefa
      -#define a_s_LAM_ALEF 0xfefb
      -#define a_f_LAM_ALEF 0xfefc
      -
      -#define a_BYTE_ORDER_MARK 0xfeff
      -
      /* Range of Arabic characters that might be shaped. */
      -#define ARABIC_CHAR(c) ((c) >= a_HAMZA && (c) <= a_MINI_ALEF)
      +#define ARABIC_CHAR(c) (((c) & 0xFF00) == 0x0600)
      diff --git a/src/mbyte.c b/src/mbyte.c
      --- a/src/mbyte.c
      +++ b/src/mbyte.c
      @@ -1450,36 +1450,6 @@ mb_cptr2char_adv(pp)

      #if defined(FEAT_ARABIC) || defined(PROTO)
      /*
      - * Check whether we are dealing with Arabic combining characters.
      - * Note: these are NOT really composing characters!
      - */
      - int
      -arabic_combine(one, two)
      - int one; /* first character */
      - int two; /* character just after "one" */
      -{
      - if (one == a_LAM)
      - return arabic_maycombine(two);
      - return FALSE;
      -}
      -
      -/*
      - * Check whether we are dealing with a character that could be regarded as an
      - * Arabic combining character, need to check the character before this.
      - */
      - int
      -arabic_maycombine(two)
      - int two;
      -{
      - if (p_arshape && !p_tbidi)
      - return (two == a_ALEF_MADDA
      - || two == a_ALEF_HAMZA_ABOVE
      - || two == a_ALEF_HAMZA_BELOW
      - || two == a_ALEF);
      - return FALSE;
      -}
      -
      -/*
      * Check if the character pointed to by "p2" is a composing character when it
      * comes after "p1". For Arabic sometimes "ab" is replaced with "c", which
      * behaves like a composing character.

      --~--~---------~--~----~------------~-------~--~----~
      You received this message from the "vim_dev" maillist.
      For more information, visit http://www.vim.org/maillist.php
      -~----------~----~----~----~------~----~------~--~---
    • Ameretat Reith
      On Wed, 08 Oct 2014 08:11:34 +0330 ... For ZWNJ, I did a tiny modification telling `arabic_shape` in one of it s invocations, to separate letters based on
      Message 46 of 46 , Oct 12, 2014
      • 0 Attachment
        On Wed, 08 Oct 2014 08:11:34 +0330
        Ali Gholami Rudi <ali.gholami.rudi@...> wrote:

        > I tested it and it is working wonderfully. The only issue is
        > ZWNJ (unicode 0x200c) and ZWJ (unicode 0x200d); I wonder if
        > the letters before and after these characters can be shaped
        > properly...

        For ZWNJ, I did a tiny modification telling `arabic_shape` in one of
        it's invocations, to separate letters based on previously entered ZWNJ
        character. ZWJ could be treated similarly but I doubt with current
        provided letters which are limited to Arabic and Farsi, there could be a
        use case for ZWJ.

        You can apply `zwnj.0.patch` on a recent vim tarball or `zwnj.1.patch`
        on top of your patch.

        --
        --
        You received this message from the "vim_dev" maillist.
        Do not top-post! Type your reply below the text you are replying to.
        For more information, visit http://www.vim.org/maillist.php

        ---
        You received this message because you are subscribed to the Google Groups "vim_dev" group.
        To unsubscribe from this group and stop receiving emails from it, send an email to vim_dev+unsubscribe@....
        For more options, visit https://groups.google.com/d/optout.
      Your message has been successfully submitted and would be delivered to recipients shortly.