// list of HTML entities and their unicode character codes for use in a parser
// if you don't need TCHAR stuff (windows-specific), just remove it with a regex
//
// here's an example:
// search expr: _T("\([A-Za-z]*\)"),
// replace expr: "\1",
struct HtmlEntity
{
const TCHAR* name;
WCHAR ch;
};
HtmlEntity entities[252] =
{
{ _T("amp"), '&' }, // ampersand
{ _T("gt"), '>' }, // greater than / right angle bracket
{ _T("lt"), '<' }, // less than / left angle bracket
{ _T("quot"), '"' }, // double quote
{ _T("apos"), '\'' }, // single quote
{ _T("nbsp"), 160 }, // non breaking space (in some apps, maybe this should be 0x20)
{ _T("AElig"), 198 }, // capital AE diphthong (ligature)
{ _T("Aacute"), 193 }, // capital A, acute accent
{ _T("Acirc"), 194 }, // capital A, circumflex accent
{ _T("Agrave"), 192 }, // capital A, grave accent
{ _T("Aring"), 197 }, // capital A, ring
{ _T("Atilde"), 195 }, // capital A, tilde
{ _T("Auml"), 196 }, // capital A, dieresis or umlaut mark
{ _T("Ccedil"), 199 }, // capital C, cedilla
{ _T("ETH"), 208 }, // capital Eth, Icelandic
{ _T("Eacute"), 201 }, // capital E, acute accent
{ _T("Ecirc"), 202 }, // capital E, circumflex accent
{ _T("Egrave"), 200 }, // capital E, grave accent
{ _T("Euml"), 203 }, // capital E, dieresis or umlaut mark
{ _T("Iacute"), 205 }, // capital I, acute accent
{ _T("Icirc"), 206 }, // capital I, circumflex accent
{ _T("Igrave"), 204 }, // capital I, grave accent
{ _T("Iuml"), 207 }, // capital I, dieresis or umlaut mark
{ _T("Ntilde"), 209 }, // capital N, tilde
{ _T("Oacute"), 211 }, // capital O, acute accent
{ _T("Ocirc"), 212 }, // capital O, circumflex accent
{ _T("Ograve"), 210 }, // capital O, grave accent
{ _T("Oslash"), 216 }, // capital O, slash
{ _T("Otilde"), 213 }, // capital O, tilde
{ _T("Ouml"), 214 }, // capital O, dieresis or umlaut mark
{ _T("THORN"), 222 }, // capital THORN, Icelandic
{ _T("Uacute"), 218 }, // capital U, acute accent
{ _T("Ucirc"), 219 }, // capital U, circumflex accent
{ _T("Ugrave"), 217 }, // capital U, grave accent
{ _T("Uuml"), 220 }, // capital U, dieresis or umlaut mark
{ _T("Yacute"), 221 }, // capital Y, acute accent
{ _T("aacute"), 225 }, // small a, acute accent
{ _T("acirc"), 226 }, // small a, circumflex accent
{ _T("aelig"), 230 }, // small ae diphthong (ligature)
{ _T("agrave"), 224 }, // small a, grave accent
{ _T("aring"), 229 }, // small a, ring
{ _T("atilde"), 227 }, // small a, tilde
{ _T("auml"), 228 }, // small a, dieresis or umlaut mark
{ _T("ccedil"), 231 }, // small c, cedilla
{ _T("eacute"), 233 }, // small e, acute accent
{ _T("ecirc"), 234 }, // small e, circumflex accent
{ _T("egrave"), 232 }, // small e, grave accent
{ _T("eth"), 240 }, // small eth, Icelandic
{ _T("euml"), 235 }, // small e, dieresis or umlaut mark
{ _T("iacute"), 237 }, // small i, acute accent
{ _T("icirc"), 238 }, // small i, circumflex accent
{ _T("igrave"), 236 }, // small i, grave accent
{ _T("iuml"), 239 }, // small i, dieresis or umlaut mark
{ _T("ntilde"), 241 }, // small n, tilde
{ _T("oacute"), 243 }, // small o, acute accent
{ _T("ocirc"), 244 }, // small o, circumflex accent
{ _T("ograve"), 242 }, // small o, grave accent
{ _T("oslash"), 248 }, // small o, slash
{ _T("otilde"), 245 }, // small o, tilde
{ _T("ouml"), 246 }, // small o, dieresis or umlaut mark
{ _T("szlig"), 223 }, // small sharp s, German (sz ligature)
{ _T("thorn"), 254 }, // small thorn, Icelandic
{ _T("uacute"), 250 }, // small u, acute accent
{ _T("ucirc"), 251 }, // small u, circumflex accent
{ _T("ugrave"), 249 }, // small u, grave accent
{ _T("uuml"), 252 }, // small u, dieresis or umlaut mark
{ _T("yacute"), 253 }, // small y, acute accent
{ _T("yuml"), 255 }, // small y, dieresis or umlaut mark
{ _T("copy"), 169 }, // copyright sign
{ _T("reg"), 174 }, // registered sign
{ _T("iexcl"), 161 },
{ _T("cent"), 162 },
{ _T("pound"), 163 },
{ _T("curren"), 164 },
{ _T("yen"), 165 },
{ _T("brvbar"), 166 },
{ _T("sect"), 167 },
{ _T("uml"), 168 },
{ _T("ordf"), 170 },
{ _T("laquo"), 171 },
{ _T("not"), 172 },
{ _T("shy"), 173 },
{ _T("macr"), 175 },
{ _T("deg"), 176 },
{ _T("plusmn"), 177 },
{ _T("sup1"), 185 },
{ _T("sup2"), 178 },
{ _T("sup3"), 179 },
{ _T("acute"), 180 },
{ _T("micro"), 181 },
{ _T("para"), 182 },
{ _T("middot"), 183 },
{ _T("cedil"), 184 },
{ _T("ordm"), 186 },
{ _T("raquo"), 187 },
{ _T("frac14"), 188 },
{ _T("frac12"), 189 },
{ _T("frac34"), 190 },
{ _T("iquest"), 191 },
{ _T("times"), 215 },
{ _T("divide"), 247 },
{ _T("OElig"), 338 },
{ _T("oelig"), 339 },
{ _T("Scaron"), 352 },
{ _T("scaron"), 353 },
{ _T("Yuml"), 376 },
{ _T("fnof"), 402 },
{ _T("circ"), 710 },
{ _T("tilde"), 732 },
{ _T("Alpha"), 913 },
{ _T("Beta"), 914 },
{ _T("Gamma"), 915 },
{ _T("Delta"), 916 },
{ _T("Epsilon"), 917 },
{ _T("Zeta"), 918 },
{ _T("Eta"), 919 },
{ _T("Theta"), 920 },
{ _T("Iota"), 921 },
{ _T("Kappa"), 922 },
{ _T("Lambda"), 923 },
{ _T("Mu"), 924 },
{ _T("Nu"), 925 },
{ _T("Xi"), 926 },
{ _T("Omicron"), 927 },
{ _T("Pi"), 928 },
{ _T("Rho"), 929 },
{ _T("Sigma"), 931 },
{ _T("Tau"), 932 },
{ _T("Upsilon"), 933 },
{ _T("Phi"), 934 },
{ _T("Chi"), 935 },
{ _T("Psi"), 936 },
{ _T("Omega"), 937 },
{ _T("alpha"), 945 },
{ _T("beta"), 946 },
{ _T("gamma"), 947 },
{ _T("delta"), 948 },
{ _T("epsilon"), 949 },
{ _T("zeta"), 950 },
{ _T("eta"), 951 },
{ _T("theta"), 952 },
{ _T("iota"), 953 },
{ _T("kappa"), 954 },
{ _T("lambda"), 955 },
{ _T("mu"), 956 },
{ _T("nu"), 957 },
{ _T("xi"), 958 },
{ _T("omicron"), 959 },
{ _T("pi"), 960 },
{ _T("rho"), 961 },
{ _T("sigmaf"), 962 },
{ _T("sigma"), 963 },
{ _T("tau"), 964 },
{ _T("upsilon"), 965 },
{ _T("phi"), 966 },
{ _T("chi"), 967 },
{ _T("psi"), 968 },
{ _T("omega"), 969 },
{ _T("thetasym"), 977 },
{ _T("upsih"), 978 },
{ _T("piv"), 982 },
{ _T("ensp"), 8194 },
{ _T("emsp"), 8195 },
{ _T("thinsp"), 8201 },
{ _T("zwnj"), 8204 },
{ _T("zwj"), 8205 },
{ _T("lrm"), 8206 },
{ _T("rlm"), 8207 },
{ _T("ndash"), 8211 },
{ _T("mdash"), 8212 },
{ _T("lsquo"), 8216 },
{ _T("rsquo"), 8217 },
{ _T("sbquo"), 8218 },
{ _T("ldquo"), 8220 },
{ _T("rdquo"), 8221 },
{ _T("bdquo"), 8222 },
{ _T("dagger"), 8224 },
{ _T("Dagger"), 8225 },
{ _T("bull"), 8226 },
{ _T("hellip"), 8230 },
{ _T("permil"), 8240 },
{ _T("prime"), 8242 },
{ _T("Prime"), 8243 },
{ _T("lsaquo"), 8249 },
{ _T("rsaquo"), 8250 },
{ _T("oline"), 8254 },
{ _T("frasl"), 8260 },
{ _T("euro"), 8364 },
{ _T("image"), 8465 },
{ _T("weierp"), 8472 },
{ _T("real"), 8476 },
{ _T("trade"), 8482 },
{ _T("alefsym"), 8501 },
{ _T("larr"), 8592 },
{ _T("uarr"), 8593 },
{ _T("rarr"), 8594 },
{ _T("darr"), 8595 },
{ _T("harr"), 8596 },
{ _T("crarr"), 8629 },
{ _T("lArr"), 8656 },
{ _T("uArr"), 8657 },
{ _T("rArr"), 8658 },
{ _T("dArr"), 8659 },
{ _T("hArr"), 8660 },
{ _T("forall"), 8704 },
{ _T("part"), 8706 },
{ _T("exist"), 8707 },
{ _T("empty"), 8709 },
{ _T("nabla"), 8711 },
{ _T("isin"), 8712 },
{ _T("notin"), 8713 },
{ _T("ni"), 8715 },
{ _T("prod"), 8719 },
{ _T("sum"), 8721 },
{ _T("minus"), 8722 },
{ _T("lowast"), 8727 },
{ _T("radic"), 8730 },
{ _T("prop"), 8733 },
{ _T("infin"), 8734 },
{ _T("ang"), 8736 },
{ _T("and"), 8743 },
{ _T("or"), 8744 },
{ _T("cap"), 8745 },
{ _T("cup"), 8746 },
{ _T("int"), 8747 },
{ _T("there4"), 8756 },
{ _T("sim"), 8764 },
{ _T("cong"), 8773 },
{ _T("asymp"), 8776 },
{ _T("ne"), 8800 },
{ _T("equiv"), 8801 },
{ _T("le"), 8804 },
{ _T("ge"), 8805 },
{ _T("sub"), 8834 },
{ _T("sup"), 8835 },
{ _T("nsub"), 8836 },
{ _T("sube"), 8838 },
{ _T("supe"), 8839 },
{ _T("oplus"), 8853 },
{ _T("otimes"), 8855 },
{ _T("perp"), 8869 },
{ _T("sdot"), 8901 },
{ _T("lceil"), 8968 },
{ _T("rceil"), 8969 },
{ _T("lfloor"), 8970 },
{ _T("rfloor"), 8971 },
{ _T("lang"), 9001 },
{ _T("rang"), 9002 },
{ _T("loz"), 9674 },
{ _T("spades"), 9824 },
{ _T("clubs"), 9827 },
{ _T("hearts"), 9829 },
{ _T("diams"), 9830 }
};