ArabicSupport.cs 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972
  1. #region File Description
  2. //-----------------------------------------------------------------------------
  3. /// <summary>
  4. /// This is an Open Source File Created by: Abdullah Konash (http://abdullahkonash.com/) Twitter: @konash
  5. /// This File allow the users to use arabic text in XNA and Unity platform.
  6. /// It flips the characters and replace them with the appropriate ones to connect the letters in the correct way.
  7. /// </summary>
  8. //-----------------------------------------------------------------------------
  9. #endregion
  10. #region Using Statements
  11. using System;
  12. using System.Collections.Generic;
  13. #endregion
  14. namespace ArabicSupport
  15. {
  16. public class ArabicFixer
  17. {
  18. /// <summary>
  19. /// Fix the specified string.
  20. /// </summary>
  21. /// <param name='str'>
  22. /// String to be fixed.
  23. /// </param>
  24. public static string Fix(string str)
  25. {
  26. return Fix(str, false, true);
  27. }
  28. public static string Fix(string str, bool rtl)
  29. {
  30. if(rtl)
  31. {
  32. return Fix(str);
  33. }
  34. else
  35. {
  36. string[] words = str.Split(' ');
  37. string result = "";
  38. string arabicToIgnore = "";
  39. foreach(string word in words)
  40. {
  41. if(char.IsLower(word.ToLower()[word.Length/2]))
  42. {
  43. result += Fix(arabicToIgnore) + word + " ";
  44. arabicToIgnore = "";
  45. }
  46. else
  47. {
  48. arabicToIgnore += word + " ";
  49. }
  50. }
  51. if(arabicToIgnore != "")
  52. result += Fix(arabicToIgnore);
  53. return result;
  54. }
  55. }
  56. /// <summary>
  57. /// Fix the specified string with customization options.
  58. /// </summary>
  59. /// <param name='str'>
  60. /// String to be fixed.
  61. /// </param>
  62. /// <param name='showTashkeel'>
  63. /// Show tashkeel.
  64. /// </param>
  65. /// <param name='useHinduNumbers'>
  66. /// Use hindu numbers.
  67. /// </param>
  68. public static string Fix(string str, bool showTashkeel, bool useHinduNumbers)
  69. {
  70. ArabicFixerTool.showTashkeel = showTashkeel;
  71. ArabicFixerTool.useHinduNumbers =useHinduNumbers;
  72. if(str.Contains("\n"))
  73. str = str.Replace("\n", Environment.NewLine);
  74. if(str.Contains(Environment.NewLine))
  75. {
  76. string[] stringSeparators = new string[] {Environment.NewLine};
  77. string[] strSplit = str.Split(stringSeparators, StringSplitOptions.None);
  78. if(strSplit.Length == 0)
  79. return ArabicFixerTool.FixLine(str);
  80. else if(strSplit.Length == 1)
  81. return ArabicFixerTool.FixLine(str);
  82. else
  83. {
  84. string outputString = ArabicFixerTool.FixLine(strSplit[0]);
  85. int iteration = 1;
  86. if(strSplit.Length > 1)
  87. {
  88. while(iteration < strSplit.Length)
  89. {
  90. outputString += Environment.NewLine + ArabicFixerTool.FixLine(strSplit[iteration]);
  91. iteration++;
  92. }
  93. }
  94. return outputString;
  95. }
  96. }
  97. else
  98. {
  99. return ArabicFixerTool.FixLine(str);
  100. }
  101. }
  102. }
  103. }
  104. /// <summary>
  105. /// Arabic Contextual forms General - Unicode
  106. /// </summary>
  107. internal enum IsolatedArabicLetters
  108. {
  109. Hamza = 0xFE80,
  110. Alef = 0xFE8D,
  111. AlefHamza = 0xFE83,
  112. WawHamza = 0xFE85,
  113. AlefMaksoor = 0xFE87,
  114. AlefMaksora = 0xFBFC,
  115. HamzaNabera = 0xFE89,
  116. Ba = 0xFE8F,
  117. Ta = 0xFE95,
  118. Tha2 = 0xFE99,
  119. Jeem = 0xFE9D,
  120. H7aa = 0xFEA1,
  121. Khaa2 = 0xFEA5,
  122. Dal = 0xFEA9,
  123. Thal = 0xFEAB,
  124. Ra2 = 0xFEAD,
  125. Zeen = 0xFEAF,
  126. Seen = 0xFEB1,
  127. Sheen = 0xFEB5,
  128. S9a = 0xFEB9,
  129. Dha = 0xFEBD,
  130. T6a = 0xFEC1,
  131. T6ha = 0xFEC5,
  132. Ain = 0xFEC9,
  133. Gain = 0xFECD,
  134. Fa = 0xFED1,
  135. Gaf = 0xFED5,
  136. Kaf = 0xFED9,
  137. Lam = 0xFEDD,
  138. Meem = 0xFEE1,
  139. Noon = 0xFEE5,
  140. Ha = 0xFEE9,
  141. Waw = 0xFEED,
  142. Ya = 0xFEF1,
  143. AlefMad = 0xFE81,
  144. TaMarboota = 0xFE93,
  145. PersianPe = 0xFB56, // Persian Letters;
  146. PersianChe = 0xFB7A,
  147. PersianZe = 0xFB8A,
  148. PersianGaf = 0xFB92,
  149. PersianGaf2 = 0xFB8E
  150. }
  151. /// <summary>
  152. /// Arabic Contextual forms - Isolated
  153. /// </summary>
  154. internal enum GeneralArabicLetters
  155. {
  156. Hamza = 0x0621,
  157. Alef = 0x0627,
  158. AlefHamza = 0x0623,
  159. WawHamza = 0x0624,
  160. AlefMaksoor = 0x0625,
  161. AlefMagsora = 0x0649,
  162. HamzaNabera = 0x0626,
  163. Ba = 0x0628,
  164. Ta = 0x062A,
  165. Tha2 = 0x062B,
  166. Jeem = 0x062C,
  167. H7aa = 0x062D,
  168. Khaa2 = 0x062E,
  169. Dal = 0x062F,
  170. Thal = 0x0630,
  171. Ra2 = 0x0631,
  172. Zeen = 0x0632,
  173. Seen = 0x0633,
  174. Sheen = 0x0634,
  175. S9a = 0x0635,
  176. Dha = 0x0636,
  177. T6a = 0x0637,
  178. T6ha = 0x0638,
  179. Ain = 0x0639,
  180. Gain = 0x063A,
  181. Fa = 0x0641,
  182. Gaf = 0x0642,
  183. Kaf = 0x0643,
  184. Lam = 0x0644,
  185. Meem = 0x0645,
  186. Noon = 0x0646,
  187. Ha = 0x0647,
  188. Waw = 0x0648,
  189. Ya = 0x064A,
  190. AlefMad = 0x0622,
  191. TaMarboota = 0x0629,
  192. PersianPe = 0x067E, // Persian Letters;
  193. PersianChe = 0x0686,
  194. PersianZe = 0x0698,
  195. PersianGaf = 0x06AF,
  196. PersianGaf2 = 0x06A9
  197. }
  198. /// <summary>
  199. /// Data Structure for conversion
  200. /// </summary>
  201. internal class ArabicMapping
  202. {
  203. public int from;
  204. public int to;
  205. public ArabicMapping(int from, int to)
  206. {
  207. this.from = from;
  208. this.to = to;
  209. }
  210. }
  211. /// <summary>
  212. /// Sets up and creates the conversion table
  213. /// </summary>
  214. internal class ArabicTable
  215. {
  216. private static List<ArabicMapping> mapList;
  217. private static ArabicTable arabicMapper;
  218. /// <summary>
  219. /// Setting up the conversion table
  220. /// </summary>
  221. private ArabicTable()
  222. {
  223. mapList = new List<ArabicMapping>();
  224. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Hamza, (int)IsolatedArabicLetters.Hamza));
  225. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Alef, (int)IsolatedArabicLetters.Alef));
  226. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.AlefHamza, (int)IsolatedArabicLetters.AlefHamza));
  227. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.WawHamza, (int)IsolatedArabicLetters.WawHamza));
  228. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.AlefMaksoor, (int)IsolatedArabicLetters.AlefMaksoor));
  229. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.AlefMagsora, (int)IsolatedArabicLetters.AlefMaksora));
  230. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.HamzaNabera, (int)IsolatedArabicLetters.HamzaNabera));
  231. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ba, (int)IsolatedArabicLetters.Ba));
  232. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ta, (int)IsolatedArabicLetters.Ta));
  233. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Tha2, (int)IsolatedArabicLetters.Tha2));
  234. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Jeem, (int)IsolatedArabicLetters.Jeem));
  235. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.H7aa, (int)IsolatedArabicLetters.H7aa));
  236. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Khaa2, (int)IsolatedArabicLetters.Khaa2));
  237. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Dal, (int)IsolatedArabicLetters.Dal));
  238. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Thal, (int)IsolatedArabicLetters.Thal));
  239. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ra2, (int)IsolatedArabicLetters.Ra2));
  240. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Zeen, (int)IsolatedArabicLetters.Zeen));
  241. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Seen, (int)IsolatedArabicLetters.Seen));
  242. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Sheen, (int)IsolatedArabicLetters.Sheen));
  243. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.S9a, (int)IsolatedArabicLetters.S9a));
  244. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Dha, (int)IsolatedArabicLetters.Dha));
  245. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.T6a, (int)IsolatedArabicLetters.T6a));
  246. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.T6ha, (int)IsolatedArabicLetters.T6ha));
  247. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ain, (int)IsolatedArabicLetters.Ain));
  248. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Gain, (int)IsolatedArabicLetters.Gain));
  249. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Fa, (int)IsolatedArabicLetters.Fa));
  250. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Gaf, (int)IsolatedArabicLetters.Gaf));
  251. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Kaf, (int)IsolatedArabicLetters.Kaf));
  252. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Lam, (int)IsolatedArabicLetters.Lam));
  253. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Meem, (int)IsolatedArabicLetters.Meem));
  254. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Noon, (int)IsolatedArabicLetters.Noon));
  255. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ha, (int)IsolatedArabicLetters.Ha));
  256. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Waw, (int)IsolatedArabicLetters.Waw));
  257. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ya, (int)IsolatedArabicLetters.Ya));
  258. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.AlefMad, (int)IsolatedArabicLetters.AlefMad));
  259. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.TaMarboota, (int)IsolatedArabicLetters.TaMarboota));
  260. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianPe, (int)IsolatedArabicLetters.PersianPe)); // Persian Letters;
  261. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianChe, (int)IsolatedArabicLetters.PersianChe));
  262. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianZe, (int)IsolatedArabicLetters.PersianZe));
  263. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianGaf, (int)IsolatedArabicLetters.PersianGaf));
  264. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianGaf2, (int)IsolatedArabicLetters.PersianGaf2));
  265. //for (int i = 0; i < generalArabic.Length; i++)
  266. // mapList.Add(new ArabicMapping((int)generalArabic.GetValue(i), (int)isolatedArabic.GetValue(i))); // I
  267. }
  268. /// <summary>
  269. /// Singleton design pattern, Get the mapper. If it was not created before, create it.
  270. /// </summary>
  271. internal static ArabicTable ArabicMapper
  272. {
  273. get
  274. {
  275. if (arabicMapper == null)
  276. arabicMapper = new ArabicTable();
  277. return arabicMapper;
  278. }
  279. }
  280. internal int Convert(int toBeConverted)
  281. {
  282. foreach (ArabicMapping arabicMap in mapList)
  283. if (arabicMap.from == toBeConverted)
  284. {
  285. return arabicMap.to;
  286. }
  287. return toBeConverted;
  288. }
  289. }
  290. internal class TashkeelLocation
  291. {
  292. public char tashkeel;
  293. public int position;
  294. public TashkeelLocation(char tashkeel, int position)
  295. {
  296. this.tashkeel = tashkeel;
  297. this.position = position;
  298. }
  299. }
  300. internal class ArabicFixerTool
  301. {
  302. internal static bool showTashkeel = true;
  303. internal static bool useHinduNumbers = false;
  304. internal static string RemoveTashkeel(string str, out List<TashkeelLocation> tashkeelLocation)
  305. {
  306. tashkeelLocation = new List<TashkeelLocation>();
  307. char[] letters = str.ToCharArray();
  308. int index = 0;
  309. for (int i = 0; i < letters.Length; i++) {
  310. if (letters [i] == (char)0x064B) { // Tanween Fatha
  311. tashkeelLocation.Add (new TashkeelLocation ((char)0x064B, i));
  312. index++;
  313. } else if (letters [i] == (char)0x064C) { // DAMMATAN
  314. tashkeelLocation.Add (new TashkeelLocation ((char)0x064C, i));
  315. index++;
  316. } else if (letters [i] == (char)0x064D){ // KASRATAN
  317. tashkeelLocation.Add (new TashkeelLocation ((char)0x064D, i));
  318. index++;
  319. }else if (letters [i] == (char)0x064E) { // FATHA
  320. if(index > 0)
  321. {
  322. if(tashkeelLocation[index-1].tashkeel == (char)0x0651 ) // SHADDA
  323. {
  324. tashkeelLocation [index - 1].tashkeel = (char)0xFC60; // Shadda With Fatha
  325. continue;
  326. }
  327. }
  328. tashkeelLocation.Add (new TashkeelLocation ((char)0x064E, i));
  329. index++;
  330. } else if (letters [i] == (char)0x064F) { // DAMMA
  331. if (index > 0) {
  332. if (tashkeelLocation [index - 1].tashkeel == (char)0x0651) { // SHADDA
  333. tashkeelLocation [index - 1].tashkeel = (char)0xFC61; // Shadda With DAMMA
  334. continue;
  335. }
  336. }
  337. tashkeelLocation.Add (new TashkeelLocation ((char)0x064F, i));
  338. index++;
  339. } else if (letters [i] == (char)0x0650) { // KASRA
  340. if (index > 0) {
  341. if (tashkeelLocation [index - 1].tashkeel == (char)0x0651) { // SHADDA
  342. tashkeelLocation [index - 1].tashkeel = (char)0xFC62; // Shadda With KASRA
  343. continue;
  344. }
  345. }
  346. tashkeelLocation.Add (new TashkeelLocation ((char)0x0650, i));
  347. index++;
  348. } else if (letters [i] == (char)0x0651) { // SHADDA
  349. if(index > 0)
  350. {
  351. if(tashkeelLocation[index-1].tashkeel == (char)0x064E ) // FATHA
  352. {
  353. tashkeelLocation [index - 1].tashkeel = (char)0xFC60; // Shadda With Fatha
  354. continue;
  355. }
  356. if(tashkeelLocation[index-1].tashkeel == (char)0x064F ) // DAMMA
  357. {
  358. tashkeelLocation [index - 1].tashkeel = (char)0xFC61; // Shadda With DAMMA
  359. continue;
  360. }
  361. if(tashkeelLocation[index-1].tashkeel == (char)0x0650 ) // KASRA
  362. {
  363. tashkeelLocation [index - 1].tashkeel = (char)0xFC62; // Shadda With KASRA
  364. continue;
  365. }
  366. }
  367. tashkeelLocation.Add (new TashkeelLocation ((char)0x0651, i));
  368. index++;
  369. } else if (letters [i] == (char)0x0652) { // SUKUN
  370. tashkeelLocation.Add (new TashkeelLocation ((char)0x0652, i));
  371. index++;
  372. } else if (letters [i] == (char)0x0653) { // MADDAH ABOVE
  373. tashkeelLocation.Add (new TashkeelLocation ((char)0x0653, i));
  374. index++;
  375. }
  376. }
  377. string[] split = str.Split(new char[]{(char)0x064B,(char)0x064C,(char)0x064D,
  378. (char)0x064E,(char)0x064F,(char)0x0650,
  379. (char)0x0651,(char)0x0652,(char)0x0653,(char)0xFC60,(char)0xFC61,(char)0xFC62});
  380. str = "";
  381. foreach(string s in split)
  382. {
  383. str += s;
  384. }
  385. return str;
  386. }
  387. internal static char[] ReturnTashkeel(char[] letters, List<TashkeelLocation> tashkeelLocation)
  388. {
  389. char[] lettersWithTashkeel = new char[letters.Length + tashkeelLocation.Count];
  390. int letterWithTashkeelTracker = 0;
  391. for(int i = 0; i<letters.Length; i++)
  392. {
  393. lettersWithTashkeel[letterWithTashkeelTracker] = letters[i];
  394. letterWithTashkeelTracker++;
  395. foreach(TashkeelLocation hLocation in tashkeelLocation)
  396. {
  397. if(hLocation.position == letterWithTashkeelTracker)
  398. {
  399. lettersWithTashkeel[letterWithTashkeelTracker] = hLocation.tashkeel;
  400. letterWithTashkeelTracker++;
  401. }
  402. }
  403. }
  404. return lettersWithTashkeel;
  405. }
  406. /// <summary>
  407. /// Converts a string to a form in which the sting will be displayed correctly for arabic text.
  408. /// </summary>
  409. /// <param name="str">String to be converted. Example: "Aaa"</param>
  410. /// <returns>Converted string. Example: "aa aaa A" without the spaces.</returns>
  411. internal static string FixLine(string str)
  412. {
  413. string test = "";
  414. List<TashkeelLocation> tashkeelLocation;
  415. string originString = RemoveTashkeel(str, out tashkeelLocation);
  416. char[] lettersOrigin = originString.ToCharArray();
  417. char[] lettersFinal = originString.ToCharArray();
  418. for (int i = 0; i < lettersOrigin.Length; i++)
  419. {
  420. lettersOrigin[i] = (char)ArabicTable.ArabicMapper.Convert(lettersOrigin[i]);
  421. }
  422. for (int i = 0; i < lettersOrigin.Length; i++)
  423. {
  424. bool skip = false;
  425. //lettersOrigin[i] = (char)ArabicTable.ArabicMapper.Convert(lettersOrigin[i]);
  426. // For special Lam Letter connections.
  427. if (lettersOrigin[i] == (char)IsolatedArabicLetters.Lam)
  428. {
  429. if (i < lettersOrigin.Length - 1)
  430. {
  431. //lettersOrigin[i + 1] = (char)ArabicTable.ArabicMapper.Convert(lettersOrigin[i + 1]);
  432. if ((lettersOrigin[i + 1] == (char)IsolatedArabicLetters.AlefMaksoor))
  433. {
  434. lettersOrigin[i] = (char)0xFEF7;
  435. lettersFinal[i + 1] = (char)0xFFFF;
  436. skip = true;
  437. }
  438. else if ((lettersOrigin[i + 1] == (char)IsolatedArabicLetters.Alef))
  439. {
  440. lettersOrigin[i] = (char)0xFEF9;
  441. lettersFinal[i + 1] = (char)0xFFFF;
  442. skip = true;
  443. }
  444. else if ((lettersOrigin[i + 1] == (char)IsolatedArabicLetters.AlefHamza))
  445. {
  446. lettersOrigin[i] = (char)0xFEF5;
  447. lettersFinal[i + 1] = (char)0xFFFF;
  448. skip = true;
  449. }
  450. else if ((lettersOrigin[i + 1] == (char)IsolatedArabicLetters.AlefMad))
  451. {
  452. lettersOrigin[i] = (char)0xFEF3;
  453. lettersFinal[i + 1] = (char)0xFFFF;
  454. skip = true;
  455. }
  456. }
  457. }
  458. if (!IsIgnoredCharacter(lettersOrigin[i]))
  459. {
  460. if (IsMiddleLetter(lettersOrigin, i))
  461. lettersFinal[i] = (char)(lettersOrigin[i] + 3);
  462. else if (IsFinishingLetter(lettersOrigin, i))
  463. lettersFinal[i] = (char)(lettersOrigin[i] + 1);
  464. else if (IsLeadingLetter(lettersOrigin, i))
  465. lettersFinal[i] = (char)(lettersOrigin[i] + 2);
  466. }
  467. //string strOut = String.Format(@"\x{0:x4}", (ushort)lettersOrigin[i]);
  468. //UnityEngine.Debug.Log(strOut);
  469. //strOut = String.Format(@"\x{0:x4}", (ushort)lettersFinal[i]);
  470. //UnityEngine.Debug.Log(strOut);
  471. test += Convert.ToString((int)lettersOrigin[i], 16) + " ";
  472. if (skip)
  473. i++;
  474. //chaning numbers to hindu
  475. if(useHinduNumbers){
  476. if(lettersOrigin[i] == (char)0x0030)
  477. lettersFinal[i] = (char)0x0660;
  478. else if(lettersOrigin[i] == (char)0x0031)
  479. lettersFinal[i] = (char)0x0661;
  480. else if(lettersOrigin[i] == (char)0x0032)
  481. lettersFinal[i] = (char)0x0662;
  482. else if(lettersOrigin[i] == (char)0x0033)
  483. lettersFinal[i] = (char)0x0663;
  484. else if(lettersOrigin[i] == (char)0x0034)
  485. lettersFinal[i] = (char)0x0664;
  486. else if(lettersOrigin[i] == (char)0x0035)
  487. lettersFinal[i] = (char)0x0665;
  488. else if(lettersOrigin[i] == (char)0x0036)
  489. lettersFinal[i] = (char)0x0666;
  490. else if(lettersOrigin[i] == (char)0x0037)
  491. lettersFinal[i] = (char)0x0667;
  492. else if(lettersOrigin[i] == (char)0x0038)
  493. lettersFinal[i] = (char)0x0668;
  494. else if(lettersOrigin[i] == (char)0x0039)
  495. lettersFinal[i] = (char)0x0669;
  496. }
  497. }
  498. //Return the Tashkeel to their places.
  499. if(showTashkeel)
  500. lettersFinal = ReturnTashkeel(lettersFinal, tashkeelLocation);
  501. List<char> list = new List<char>();
  502. List<char> numberList = new List<char>();
  503. for (int i = lettersFinal.Length - 1; i >= 0; i--)
  504. {
  505. // if (lettersFinal[i] == '(')
  506. // numberList.Add(')');
  507. // else if (lettersFinal[i] == ')')
  508. // numberList.Add('(');
  509. // else if (lettersFinal[i] == '<')
  510. // numberList.Add('>');
  511. // else if (lettersFinal[i] == '>')
  512. // numberList.Add('<');
  513. // else
  514. if (char.IsPunctuation(lettersFinal[i]) && i>0 && i < lettersFinal.Length-1 &&
  515. (char.IsPunctuation(lettersFinal[i-1]) || char.IsPunctuation(lettersFinal[i+1])))
  516. {
  517. if (lettersFinal[i] == '(')
  518. list.Add(')');
  519. else if (lettersFinal[i] == ')')
  520. list.Add('(');
  521. else if (lettersFinal[i] == '<')
  522. list.Add('>');
  523. else if (lettersFinal[i] == '>')
  524. list.Add('<');
  525. else if (lettersFinal[i] == '[')
  526. list.Add(']');
  527. else if (lettersFinal[i] == ']')
  528. list.Add('[');
  529. else if (lettersFinal[i] != 0xFFFF)
  530. list.Add(lettersFinal[i]);
  531. }
  532. // For cases where english words and arabic are mixed. This allows for using arabic, english and numbers in one sentence.
  533. else if(lettersFinal[i] == ' ' && i > 0 && i < lettersFinal.Length-1 &&
  534. (char.IsLower(lettersFinal[i-1]) || char.IsUpper(lettersFinal[i-1]) || char.IsNumber(lettersFinal[i-1])) &&
  535. (char.IsLower(lettersFinal[i+1]) || char.IsUpper(lettersFinal[i+1]) ||char.IsNumber(lettersFinal[i+1])))
  536. {
  537. numberList.Add(lettersFinal[i]);
  538. }
  539. else if (char.IsNumber(lettersFinal[i]) || char.IsLower(lettersFinal[i]) ||
  540. char.IsUpper(lettersFinal[i]) || char.IsSymbol(lettersFinal[i]) ||
  541. char.IsPunctuation(lettersFinal[i]))// || lettersFinal[i] == '^') //)
  542. {
  543. if (lettersFinal[i] == '(')
  544. numberList.Add(')');
  545. else if (lettersFinal[i] == ')')
  546. numberList.Add('(');
  547. else if (lettersFinal[i] == '<')
  548. numberList.Add('>');
  549. else if (lettersFinal[i] == '>')
  550. numberList.Add('<');
  551. else if (lettersFinal[i] == '[')
  552. list.Add(']');
  553. else if (lettersFinal[i] == ']')
  554. list.Add('[');
  555. else
  556. numberList.Add(lettersFinal[i]);
  557. }
  558. else if( (lettersFinal[i] >= (char)0xD800 && lettersFinal[i] <= (char)0xDBFF) ||
  559. (lettersFinal[i] >= (char)0xDC00 && lettersFinal[i] <= (char)0xDFFF))
  560. {
  561. numberList.Add(lettersFinal[i]);
  562. }
  563. else
  564. {
  565. if (numberList.Count > 0)
  566. {
  567. for (int j = 0; j < numberList.Count; j++)
  568. list.Add(numberList[numberList.Count - 1 - j]);
  569. numberList.Clear();
  570. }
  571. if (lettersFinal[i] != 0xFFFF)
  572. list.Add(lettersFinal[i]);
  573. }
  574. }
  575. if (numberList.Count > 0)
  576. {
  577. for (int j = 0; j < numberList.Count; j++)
  578. list.Add(numberList[numberList.Count - 1 - j]);
  579. numberList.Clear();
  580. }
  581. // Moving letters from a list to an array.
  582. lettersFinal = new char[list.Count];
  583. for (int i = 0; i < lettersFinal.Length; i++)
  584. lettersFinal[i] = list[i];
  585. str = new string(lettersFinal);
  586. return str;
  587. }
  588. /// <summary>
  589. /// English letters, numbers and punctuation characters are ignored. This checks if the ch is an ignored character.
  590. /// </summary>
  591. /// <param name="ch">The character to be checked for skipping</param>
  592. /// <returns>True if the character should be ignored, false if it should not be ignored.</returns>
  593. internal static bool IsIgnoredCharacter(char ch)
  594. {
  595. bool isPunctuation = char.IsPunctuation(ch);
  596. bool isNumber = char.IsNumber(ch);
  597. bool isLower = char.IsLower(ch);
  598. bool isUpper = char.IsUpper(ch);
  599. bool isSymbol = char.IsSymbol(ch);
  600. bool isPersianCharacter = ch == (char)0xFB56 || ch == (char)0xFB7A || ch == (char)0xFB8A || ch == (char)0xFB92 || ch == (char)0xFB8E;
  601. bool isPresentationFormB = (ch <= (char)0xFEFF && ch >= (char)0xFE70);
  602. bool isAcceptableCharacter = isPresentationFormB || isPersianCharacter || ch == (char)0xFBFC;
  603. return isPunctuation ||
  604. isNumber ||
  605. isLower ||
  606. isUpper ||
  607. isSymbol ||
  608. !isAcceptableCharacter ||
  609. ch == 'a' || ch == '>' || ch == '<' || ch == (char)0x061B;
  610. // return char.IsPunctuation(ch) || char.IsNumber(ch) || ch == 'a' || ch == '>' || ch == '<' ||
  611. // char.IsLower(ch) || char.IsUpper(ch) || ch == (char)0x061B || char.IsSymbol(ch)
  612. // || !(ch <= (char)0xFEFF && ch >= (char)0xFE70) // Presentation Form B
  613. // || ch == (char)0xFB56 || ch == (char)0xFB7A || ch == (char)0xFB8A || ch == (char)0xFB92; // Persian Characters
  614. // PersianPe = 0xFB56,
  615. // PersianChe = 0xFB7A,
  616. // PersianZe = 0xFB8A,
  617. // PersianGaf = 0xFB92
  618. //lettersOrigin[i] <= (char)0xFEFF && lettersOrigin[i] >= (char)0xFE70
  619. }
  620. /// <summary>
  621. /// Checks if the letter at index value is a leading character in Arabic or not.
  622. /// </summary>
  623. /// <param name="letters">The whole word that contains the character to be checked</param>
  624. /// <param name="index">The index of the character to be checked</param>
  625. /// <returns>True if the character at index is a leading character, else, returns false</returns>
  626. internal static bool IsLeadingLetter(char[] letters, int index)
  627. {
  628. bool lettersThatCannotBeBeforeALeadingLetter = index == 0
  629. || letters[index - 1] == ' '
  630. || letters[index - 1] == '*' // ??? Remove?
  631. || letters[index - 1] == 'A' // ??? Remove?
  632. || char.IsPunctuation(letters[index - 1])
  633. || letters[index - 1] == '>'
  634. || letters[index - 1] == '<'
  635. || letters[index - 1] == (int)IsolatedArabicLetters.Alef
  636. || letters[index - 1] == (int)IsolatedArabicLetters.Dal
  637. || letters[index - 1] == (int)IsolatedArabicLetters.Thal
  638. || letters[index - 1] == (int)IsolatedArabicLetters.Ra2
  639. || letters[index - 1] == (int)IsolatedArabicLetters.Zeen
  640. || letters[index - 1] == (int)IsolatedArabicLetters.PersianZe
  641. //|| letters[index - 1] == (int)IsolatedArabicLetters.AlefMaksora
  642. || letters[index - 1] == (int)IsolatedArabicLetters.Waw
  643. || letters[index - 1] == (int)IsolatedArabicLetters.AlefMad
  644. || letters[index - 1] == (int)IsolatedArabicLetters.AlefHamza
  645. || letters[index - 1] == (int)IsolatedArabicLetters.Hamza
  646. || letters[index - 1] == (int)IsolatedArabicLetters.AlefMaksoor
  647. || letters[index - 1] == (int)IsolatedArabicLetters.WawHamza;
  648. bool lettersThatCannotBeALeadingLetter = letters[index] != ' '
  649. && letters[index] != (int)IsolatedArabicLetters.Dal
  650. && letters[index] != (int)IsolatedArabicLetters.Thal
  651. && letters[index] != (int)IsolatedArabicLetters.Ra2
  652. && letters[index] != (int)IsolatedArabicLetters.Zeen
  653. && letters[index] != (int)IsolatedArabicLetters.PersianZe
  654. && letters[index] != (int)IsolatedArabicLetters.Alef
  655. && letters[index] != (int)IsolatedArabicLetters.AlefHamza
  656. && letters[index] != (int)IsolatedArabicLetters.AlefMaksoor
  657. && letters[index] != (int)IsolatedArabicLetters.AlefMad
  658. && letters[index] != (int)IsolatedArabicLetters.WawHamza
  659. && letters[index] != (int)IsolatedArabicLetters.Waw
  660. && letters[index] != (int)IsolatedArabicLetters.Hamza;
  661. bool lettersThatCannotBeAfterLeadingLetter = index < letters.Length - 1
  662. && letters[index + 1] != ' '
  663. && !char.IsPunctuation(letters[index + 1] )
  664. && !char.IsNumber(letters[index + 1])
  665. && !char.IsSymbol(letters[index + 1])
  666. && !char.IsLower(letters[index + 1])
  667. && !char.IsUpper(letters[index + 1])
  668. && letters[index + 1] != (int)IsolatedArabicLetters.Hamza;
  669. if(lettersThatCannotBeBeforeALeadingLetter && lettersThatCannotBeALeadingLetter && lettersThatCannotBeAfterLeadingLetter)
  670. // if ((index == 0 || letters[index - 1] == ' ' || letters[index - 1] == '*' || letters[index - 1] == 'A' || char.IsPunctuation(letters[index - 1])
  671. // || letters[index - 1] == '>' || letters[index - 1] == '<'
  672. // || letters[index - 1] == (int)IsolatedArabicLetters.Alef
  673. // || letters[index - 1] == (int)IsolatedArabicLetters.Dal || letters[index - 1] == (int)IsolatedArabicLetters.Thal
  674. // || letters[index - 1] == (int)IsolatedArabicLetters.Ra2
  675. // || letters[index - 1] == (int)IsolatedArabicLetters.Zeen || letters[index - 1] == (int)IsolatedArabicLetters.PersianZe
  676. // || letters[index - 1] == (int)IsolatedArabicLetters.AlefMaksora || letters[index - 1] == (int)IsolatedArabicLetters.Waw
  677. // || letters[index - 1] == (int)IsolatedArabicLetters.AlefMad || letters[index - 1] == (int)IsolatedArabicLetters.AlefHamza
  678. // || letters[index - 1] == (int)IsolatedArabicLetters.AlefMaksoor || letters[index - 1] == (int)IsolatedArabicLetters.WawHamza)
  679. // && letters[index] != ' ' && letters[index] != (int)IsolatedArabicLetters.Dal
  680. // && letters[index] != (int)IsolatedArabicLetters.Thal
  681. // && letters[index] != (int)IsolatedArabicLetters.Ra2
  682. // && letters[index] != (int)IsolatedArabicLetters.Zeen && letters[index] != (int)IsolatedArabicLetters.PersianZe
  683. // && letters[index] != (int)IsolatedArabicLetters.Alef && letters[index] != (int)IsolatedArabicLetters.AlefHamza
  684. // && letters[index] != (int)IsolatedArabicLetters.AlefMaksoor
  685. // && letters[index] != (int)IsolatedArabicLetters.AlefMad
  686. // && letters[index] != (int)IsolatedArabicLetters.WawHamza
  687. // && letters[index] != (int)IsolatedArabicLetters.Waw
  688. // && letters[index] != (int)IsolatedArabicLetters.Hamza
  689. // && index < letters.Length - 1 && letters[index + 1] != ' ' && !char.IsPunctuation(letters[index + 1] ) && !char.IsNumber(letters[index + 1])
  690. // && letters[index + 1] != (int)IsolatedArabicLetters.Hamza )
  691. {
  692. return true;
  693. }
  694. else
  695. return false;
  696. }
  697. /// <summary>
  698. /// Checks if the letter at index value is a finishing character in Arabic or not.
  699. /// </summary>
  700. /// <param name="letters">The whole word that contains the character to be checked</param>
  701. /// <param name="index">The index of the character to be checked</param>
  702. /// <returns>True if the character at index is a finishing character, else, returns false</returns>
  703. internal static bool IsFinishingLetter(char[] letters, int index)
  704. {
  705. bool indexZero = index != 0;
  706. bool lettersThatCannotBeBeforeAFinishingLetter = (index == 0) ? false :
  707. letters[index - 1] != ' '
  708. // && char.IsDigit(letters[index-1])
  709. // && char.IsLower(letters[index-1])
  710. // && char.IsUpper(letters[index-1])
  711. // && char.IsNumber(letters[index-1])
  712. // && char.IsWhiteSpace(letters[index-1])
  713. // && char.IsPunctuation(letters[index-1])
  714. // && char.IsSymbol(letters[index-1])
  715. && letters[index - 1] != (int)IsolatedArabicLetters.Dal
  716. && letters[index - 1] != (int)IsolatedArabicLetters.Thal
  717. && letters[index - 1] != (int)IsolatedArabicLetters.Ra2
  718. && letters[index - 1] != (int)IsolatedArabicLetters.Zeen
  719. && letters[index - 1] != (int)IsolatedArabicLetters.PersianZe
  720. //&& letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksora
  721. && letters[index - 1] != (int)IsolatedArabicLetters.Waw
  722. && letters[index - 1] != (int)IsolatedArabicLetters.Alef
  723. && letters[index - 1] != (int)IsolatedArabicLetters.AlefMad
  724. && letters[index - 1] != (int)IsolatedArabicLetters.AlefHamza
  725. && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksoor
  726. && letters[index - 1] != (int)IsolatedArabicLetters.WawHamza
  727. && letters[index - 1] != (int)IsolatedArabicLetters.Hamza
  728. && !char.IsPunctuation(letters[index - 1])
  729. && letters[index - 1] != '>'
  730. && letters[index - 1] != '<';
  731. bool lettersThatCannotBeFinishingLetters = letters[index] != ' ' && letters[index] != (int)IsolatedArabicLetters.Hamza;
  732. if(lettersThatCannotBeBeforeAFinishingLetter && lettersThatCannotBeFinishingLetters)
  733. // if (index != 0 && letters[index - 1] != ' ' && letters[index - 1] != '*' && letters[index - 1] != 'A'
  734. // && letters[index - 1] != (int)IsolatedArabicLetters.Dal && letters[index - 1] != (int)IsolatedArabicLetters.Thal
  735. // && letters[index - 1] != (int)IsolatedArabicLetters.Ra2
  736. // && letters[index - 1] != (int)IsolatedArabicLetters.Zeen && letters[index - 1] != (int)IsolatedArabicLetters.PersianZe
  737. // && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksora && letters[index - 1] != (int)IsolatedArabicLetters.Waw
  738. // && letters[index - 1] != (int)IsolatedArabicLetters.Alef && letters[index - 1] != (int)IsolatedArabicLetters.AlefMad
  739. // && letters[index - 1] != (int)IsolatedArabicLetters.AlefHamza && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksoor
  740. // && letters[index - 1] != (int)IsolatedArabicLetters.WawHamza && letters[index - 1] != (int)IsolatedArabicLetters.Hamza
  741. // && !char.IsPunctuation(letters[index - 1]) && letters[index - 1] != '>' && letters[index - 1] != '<'
  742. // && letters[index] != ' ' && index < letters.Length
  743. // && letters[index] != (int)IsolatedArabicLetters.Hamza)
  744. {
  745. //try
  746. //{
  747. // if (char.IsPunctuation(letters[index + 1]))
  748. // return true;
  749. // else
  750. // return false;
  751. //}
  752. //catch (Exception e)
  753. //{
  754. // return false;
  755. //}
  756. return true;
  757. }
  758. //return true;
  759. else
  760. return false;
  761. }
  762. /// <summary>
  763. /// Checks if the letter at index value is a middle character in Arabic or not.
  764. /// </summary>
  765. /// <param name="letters">The whole word that contains the character to be checked</param>
  766. /// <param name="index">The index of the character to be checked</param>
  767. /// <returns>True if the character at index is a middle character, else, returns false</returns>
  768. internal static bool IsMiddleLetter(char[] letters, int index)
  769. {
  770. bool lettersThatCannotBeMiddleLetters = (index == 0) ? false :
  771. letters[index] != (int)IsolatedArabicLetters.Alef
  772. && letters[index] != (int)IsolatedArabicLetters.Dal
  773. && letters[index] != (int)IsolatedArabicLetters.Thal
  774. && letters[index] != (int)IsolatedArabicLetters.Ra2
  775. && letters[index] != (int)IsolatedArabicLetters.Zeen
  776. && letters[index] != (int)IsolatedArabicLetters.PersianZe
  777. //&& letters[index] != (int)IsolatedArabicLetters.AlefMaksora
  778. && letters[index] != (int)IsolatedArabicLetters.Waw
  779. && letters[index] != (int)IsolatedArabicLetters.AlefMad
  780. && letters[index] != (int)IsolatedArabicLetters.AlefHamza
  781. && letters[index] != (int)IsolatedArabicLetters.AlefMaksoor
  782. && letters[index] != (int)IsolatedArabicLetters.WawHamza
  783. && letters[index] != (int)IsolatedArabicLetters.Hamza;
  784. bool lettersThatCannotBeBeforeMiddleCharacters = (index == 0) ? false :
  785. letters[index - 1] != (int)IsolatedArabicLetters.Alef
  786. && letters[index - 1] != (int)IsolatedArabicLetters.Dal
  787. && letters[index - 1] != (int)IsolatedArabicLetters.Thal
  788. && letters[index - 1] != (int)IsolatedArabicLetters.Ra2
  789. && letters[index - 1] != (int)IsolatedArabicLetters.Zeen
  790. && letters[index - 1] != (int)IsolatedArabicLetters.PersianZe
  791. //&& letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksora
  792. && letters[index - 1] != (int)IsolatedArabicLetters.Waw
  793. && letters[index - 1] != (int)IsolatedArabicLetters.AlefMad
  794. && letters[index - 1] != (int)IsolatedArabicLetters.AlefHamza
  795. && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksoor
  796. && letters[index - 1] != (int)IsolatedArabicLetters.WawHamza
  797. && letters[index - 1] != (int)IsolatedArabicLetters.Hamza
  798. && !char.IsPunctuation(letters[index - 1])
  799. && letters[index - 1] != '>'
  800. && letters[index - 1] != '<'
  801. && letters[index - 1] != ' '
  802. && letters[index - 1] != '*';
  803. bool lettersThatCannotBeAfterMiddleCharacters = (index >= letters.Length - 1) ? false :
  804. letters[index + 1] != ' '
  805. && letters[index + 1] != '\r'
  806. && letters[index + 1] != (int)IsolatedArabicLetters.Hamza
  807. && !char.IsNumber(letters[index + 1])
  808. && !char.IsSymbol(letters[index + 1])
  809. && !char.IsPunctuation(letters[index + 1]);
  810. if(lettersThatCannotBeAfterMiddleCharacters && lettersThatCannotBeBeforeMiddleCharacters && lettersThatCannotBeMiddleLetters)
  811. // if (index != 0 && letters[index] != ' '
  812. // && letters[index] != (int)IsolatedArabicLetters.Alef && letters[index] != (int)IsolatedArabicLetters.Dal
  813. // && letters[index] != (int)IsolatedArabicLetters.Thal && letters[index] != (int)IsolatedArabicLetters.Ra2
  814. // && letters[index] != (int)IsolatedArabicLetters.Zeen && letters[index] != (int)IsolatedArabicLetters.PersianZe
  815. // && letters[index] != (int)IsolatedArabicLetters.AlefMaksora
  816. // && letters[index] != (int)IsolatedArabicLetters.Waw && letters[index] != (int)IsolatedArabicLetters.AlefMad
  817. // && letters[index] != (int)IsolatedArabicLetters.AlefHamza && letters[index] != (int)IsolatedArabicLetters.AlefMaksoor
  818. // && letters[index] != (int)IsolatedArabicLetters.WawHamza && letters[index] != (int)IsolatedArabicLetters.Hamza
  819. // && letters[index - 1] != (int)IsolatedArabicLetters.Alef && letters[index - 1] != (int)IsolatedArabicLetters.Dal
  820. // && letters[index - 1] != (int)IsolatedArabicLetters.Thal && letters[index - 1] != (int)IsolatedArabicLetters.Ra2
  821. // && letters[index - 1] != (int)IsolatedArabicLetters.Zeen && letters[index - 1] != (int)IsolatedArabicLetters.PersianZe
  822. // && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksora
  823. // && letters[index - 1] != (int)IsolatedArabicLetters.Waw && letters[index - 1] != (int)IsolatedArabicLetters.AlefMad
  824. // && letters[index - 1] != (int)IsolatedArabicLetters.AlefHamza && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksoor
  825. // && letters[index - 1] != (int)IsolatedArabicLetters.WawHamza && letters[index - 1] != (int)IsolatedArabicLetters.Hamza
  826. // && letters[index - 1] != '>' && letters[index - 1] != '<'
  827. // && letters[index - 1] != ' ' && letters[index - 1] != '*' && !char.IsPunctuation(letters[index - 1])
  828. // && index < letters.Length - 1 && letters[index + 1] != ' ' && letters[index + 1] != '\r' && letters[index + 1] != 'A'
  829. // && letters[index + 1] != '>' && letters[index + 1] != '>' && letters[index + 1] != (int)IsolatedArabicLetters.Hamza
  830. // )
  831. {
  832. try
  833. {
  834. if (char.IsPunctuation(letters[index + 1]))
  835. return false;
  836. else
  837. return true;
  838. }
  839. catch
  840. {
  841. return false;
  842. }
  843. //return true;
  844. }
  845. else
  846. return false;
  847. }
  848. }