Current Path: > > opt > alt > libicu65 > usr > > include > > unicode
Operation : Linux premium131.web-hosting.com 4.18.0-553.44.1.lve.el8.x86_64 #1 SMP Thu Mar 13 14:29:12 UTC 2025 x86_64 Software : Apache Server IP : 162.0.232.56 | Your IP: 216.73.216.111 Domains : 1034 Domain(s) Permission : [ 0755 ]
Name | Type | Size | Last Modified | Actions |
---|---|---|---|---|
alphaindex.h | File | 27118 bytes | February 08 2022 16:22:58. | |
appendable.h | File | 8694 bytes | February 08 2022 16:22:58. | |
basictz.h | File | 9365 bytes | February 08 2022 16:22:58. | |
brkiter.h | File | 28464 bytes | February 08 2022 16:22:58. | |
bytestream.h | File | 9829 bytes | February 08 2022 16:22:58. | |
bytestrie.h | File | 21266 bytes | February 08 2022 16:22:58. | |
bytestriebuilder.h | File | 7248 bytes | February 08 2022 16:22:58. | |
calendar.h | File | 108278 bytes | February 08 2022 16:22:58. | |
caniter.h | File | 7611 bytes | February 08 2022 16:22:58. | |
casemap.h | File | 25933 bytes | February 08 2022 16:22:58. | |
char16ptr.h | File | 7393 bytes | February 08 2022 16:22:58. | |
chariter.h | File | 24632 bytes | February 08 2022 16:22:58. | |
choicfmt.h | File | 24482 bytes | February 08 2022 16:22:58. | |
coleitr.h | File | 14094 bytes | February 08 2022 16:22:58. | |
coll.h | File | 57581 bytes | February 08 2022 16:22:58. | |
compactdecimalformat.h | File | 7047 bytes | February 08 2022 16:22:58. | |
curramt.h | File | 3851 bytes | February 08 2022 16:22:58. | |
currpinf.h | File | 7473 bytes | February 08 2022 16:22:58. | |
currunit.h | File | 4149 bytes | February 08 2022 16:22:58. | |
datefmt.h | File | 41641 bytes | February 08 2022 16:22:58. | |
dbbi.h | File | 1223 bytes | February 08 2022 16:22:58. | |
dcfmtsym.h | File | 20611 bytes | February 08 2022 16:22:59. | |
decimfmt.h | File | 89480 bytes | February 08 2022 16:22:59. | |
docmain.h | File | 7142 bytes | February 08 2022 16:22:58. | |
dtfmtsym.h | File | 38609 bytes | February 08 2022 16:22:59. | |
dtintrv.h | File | 3930 bytes | February 08 2022 16:22:58. | |
dtitvfmt.h | File | 47745 bytes | February 08 2022 16:22:59. | |
dtitvinf.h | File | 18959 bytes | February 08 2022 16:22:59. | |
dtptngen.h | File | 25684 bytes | February 08 2022 16:22:59. | |
dtrule.h | File | 8888 bytes | February 08 2022 16:22:59. | |
edits.h | File | 21237 bytes | February 08 2022 16:22:58. | |
enumset.h | File | 2130 bytes | February 08 2022 16:22:58. | |
errorcode.h | File | 4956 bytes | February 08 2022 16:22:58. | |
fieldpos.h | File | 8899 bytes | February 08 2022 16:22:59. | |
filteredbrk.h | File | 5501 bytes | February 08 2022 16:22:58. | |
fmtable.h | File | 25007 bytes | February 08 2022 16:22:59. | |
format.h | File | 12802 bytes | February 08 2022 16:22:59. | |
formattedvalue.h | File | 10521 bytes | February 08 2022 16:22:59. | |
fpositer.h | File | 3109 bytes | February 08 2022 16:22:59. | |
gender.h | File | 3408 bytes | February 08 2022 16:22:59. | |
gregocal.h | File | 32472 bytes | February 08 2022 16:22:59. | |
icudataver.h | File | 1051 bytes | February 08 2022 16:22:58. | |
icuplug.h | File | 12166 bytes | February 08 2022 16:22:58. | |
idna.h | File | 13000 bytes | February 08 2022 16:22:58. | |
listformatter.h | File | 9701 bytes | February 08 2022 16:22:59. | |
localebuilder.h | File | 11540 bytes | February 08 2022 16:22:58. | |
localematcher.h | File | 23043 bytes | February 08 2022 16:22:58. | |
localpointer.h | File | 20159 bytes | February 08 2022 16:22:58. | |
locdspnm.h | File | 7292 bytes | February 08 2022 16:22:58. | |
locid.h | File | 48536 bytes | February 08 2022 16:22:58. | |
measfmt.h | File | 11603 bytes | February 08 2022 16:22:59. | |
measunit.h | File | 95554 bytes | February 08 2022 16:22:59. | |
measure.h | File | 4423 bytes | February 08 2022 16:22:59. | |
messagepattern.h | File | 34521 bytes | February 08 2022 16:22:58. | |
msgfmt.h | File | 45168 bytes | February 08 2022 16:22:59. | |
normalizer2.h | File | 34851 bytes | February 08 2022 16:22:58. | |
normlzr.h | File | 31682 bytes | February 08 2022 16:22:58. | |
nounit.h | File | 2754 bytes | February 08 2022 16:22:59. | |
numberformatter.h | File | 88383 bytes | February 08 2022 16:22:59. | |
numberrangeformatter.h | File | 30865 bytes | February 08 2022 16:22:59. | |
numfmt.h | File | 51001 bytes | February 08 2022 16:22:59. | |
numsys.h | File | 7364 bytes | February 08 2022 16:22:59. | |
parseerr.h | File | 3155 bytes | February 08 2022 16:22:58. | |
parsepos.h | File | 5692 bytes | February 08 2022 16:22:58. | |
platform.h | File | 28752 bytes | February 08 2022 16:22:58. | |
plurfmt.h | File | 25800 bytes | February 08 2022 16:22:59. | |
plurrule.h | File | 18835 bytes | February 08 2022 16:22:59. | |
ptypes.h | File | 3577 bytes | February 08 2022 16:22:58. | |
putil.h | File | 6487 bytes | February 08 2022 16:22:58. | |
rbbi.h | File | 27218 bytes | February 08 2022 16:22:58. | |
rbnf.h | File | 49899 bytes | February 08 2022 16:22:59. | |
rbtz.h | File | 15979 bytes | February 08 2022 16:22:59. | |
regex.h | File | 86382 bytes | February 08 2022 16:22:59. | |
region.h | File | 9404 bytes | February 08 2022 16:22:59. | |
reldatefmt.h | File | 23159 bytes | February 08 2022 16:22:59. | |
rep.h | File | 9599 bytes | February 08 2022 16:22:58. | |
resbund.h | File | 18503 bytes | February 08 2022 16:22:58. | |
schriter.h | File | 6475 bytes | February 08 2022 16:22:58. | |
scientificnumberformatter.h | File | 6553 bytes | February 08 2022 16:22:59. | |
search.h | File | 22757 bytes | February 08 2022 16:22:59. | |
selfmt.h | File | 14643 bytes | February 08 2022 16:22:59. | |
simpleformatter.h | File | 12888 bytes | February 08 2022 16:22:58. | |
simpletz.h | File | 46527 bytes | February 08 2022 16:22:59. | |
smpdtfmt.h | File | 72670 bytes | February 08 2022 16:22:59. | |
sortkey.h | File | 11444 bytes | February 08 2022 16:22:59. | |
std_string.h | File | 1076 bytes | February 08 2022 16:22:58. | |
strenum.h | File | 10158 bytes | February 08 2022 16:22:58. | |
stringoptions.h | File | 5926 bytes | February 08 2022 16:22:58. | |
stringpiece.h | File | 7556 bytes | February 08 2022 16:22:58. | |
stringtriebuilder.h | File | 15698 bytes | February 08 2022 16:22:58. | |
stsearch.h | File | 21810 bytes | February 08 2022 16:22:59. | |
symtable.h | File | 4374 bytes | February 08 2022 16:22:58. | |
tblcoll.h | File | 37489 bytes | February 08 2022 16:22:59. | |
timezone.h | File | 42005 bytes | February 08 2022 16:22:59. | |
tmunit.h | File | 3461 bytes | February 08 2022 16:22:59. | |
tmutamt.h | File | 5015 bytes | February 08 2022 16:22:59. | |
tmutfmt.h | File | 8042 bytes | February 08 2022 16:22:59. | |
translit.h | File | 67400 bytes | February 08 2022 16:22:59. | |
tzfmt.h | File | 43916 bytes | February 08 2022 16:22:59. | |
tznames.h | File | 17252 bytes | February 08 2022 16:22:59. | |
tzrule.h | File | 36215 bytes | February 08 2022 16:22:59. | |
tztrans.h | File | 6271 bytes | February 08 2022 16:22:59. | |
ubidi.h | File | 91711 bytes | February 08 2022 16:22:58. | |
ubiditransform.h | File | 12950 bytes | February 08 2022 16:22:58. | |
ubrk.h | File | 24547 bytes | February 08 2022 16:22:58. | |
ucal.h | File | 58265 bytes | February 08 2022 16:22:59. | |
ucasemap.h | File | 15546 bytes | February 08 2022 16:22:58. | |
ucat.h | File | 5484 bytes | February 08 2022 16:22:58. | |
uchar.h | File | 143936 bytes | February 08 2022 16:22:58. | |
ucharstrie.h | File | 23120 bytes | February 08 2022 16:22:58. | |
ucharstriebuilder.h | File | 7378 bytes | February 08 2022 16:22:58. | |
uchriter.h | File | 13521 bytes | February 08 2022 16:22:58. | |
uclean.h | File | 11474 bytes | February 08 2022 16:22:58. | |
ucnv.h | File | 85085 bytes | February 08 2022 16:22:58. | |
ucnv_cb.h | File | 6748 bytes | February 08 2022 16:22:58. | |
ucnv_err.h | File | 21492 bytes | February 08 2022 16:22:58. | |
ucnvsel.h | File | 6283 bytes | February 08 2022 16:22:58. | |
ucol.h | File | 62939 bytes | February 08 2022 16:22:59. | |
ucoleitr.h | File | 9684 bytes | February 08 2022 16:22:59. | |
uconfig.h | File | 12356 bytes | February 08 2022 16:22:58. | |
ucpmap.h | File | 5663 bytes | February 08 2022 16:22:58. | |
ucptrie.h | File | 23002 bytes | February 08 2022 16:22:58. | |
ucsdet.h | File | 15018 bytes | February 08 2022 16:22:59. | |
ucurr.h | File | 16507 bytes | February 08 2022 16:22:58. | |
udat.h | File | 62342 bytes | February 08 2022 16:22:59. | |
udata.h | File | 15930 bytes | February 08 2022 16:22:58. | |
udateintervalformat.h | File | 10272 bytes | February 08 2022 16:22:59. | |
udatpg.h | File | 26639 bytes | February 08 2022 16:22:59. | |
udisplaycontext.h | File | 6029 bytes | February 08 2022 16:22:58. | |
uenum.h | File | 7970 bytes | February 08 2022 16:22:58. | |
ufieldpositer.h | File | 4461 bytes | February 08 2022 16:22:59. | |
uformattable.h | File | 11198 bytes | February 08 2022 16:22:59. | |
uformattedvalue.h | File | 12431 bytes | February 08 2022 16:22:59. | |
ugender.h | File | 2052 bytes | February 08 2022 16:22:59. | |
uidna.h | File | 34169 bytes | February 08 2022 16:22:58. | |
uiter.h | File | 23319 bytes | February 08 2022 16:22:58. | |
uldnames.h | File | 10702 bytes | February 08 2022 16:22:58. | |
ulistformatter.h | File | 9047 bytes | February 08 2022 16:22:59. | |
uloc.h | File | 53805 bytes | February 08 2022 16:22:58. | |
ulocdata.h | File | 11533 bytes | February 08 2022 16:22:59. | |
umachine.h | File | 14877 bytes | February 08 2022 16:22:58. | |
umisc.h | File | 1365 bytes | February 08 2022 16:22:58. | |
umsg.h | File | 24812 bytes | February 08 2022 16:22:59. | |
umutablecptrie.h | File | 8435 bytes | February 08 2022 16:22:58. | |
unifilt.h | File | 4055 bytes | February 08 2022 16:22:58. | |
unifunct.h | File | 4141 bytes | February 08 2022 16:22:58. | |
unimatch.h | File | 6244 bytes | February 08 2022 16:22:58. | |
unirepl.h | File | 3464 bytes | February 08 2022 16:22:59. | |
uniset.h | File | 66459 bytes | February 08 2022 16:22:58. | |
unistr.h | File | 174523 bytes | February 08 2022 16:22:58. | |
unorm.h | File | 21015 bytes | February 08 2022 16:22:58. | |
unorm2.h | File | 25254 bytes | February 08 2022 16:22:58. | |
unum.h | File | 54906 bytes | February 08 2022 16:22:59. | |
unumberformatter.h | File | 25972 bytes | February 08 2022 16:22:59. | |
unumsys.h | File | 7387 bytes | February 08 2022 16:22:59. | |
uobject.h | File | 10936 bytes | February 08 2022 16:22:58. | |
upluralrules.h | File | 8068 bytes | February 08 2022 16:22:59. | |
uregex.h | File | 73784 bytes | February 08 2022 16:22:59. | |
uregion.h | File | 10073 bytes | February 08 2022 16:22:59. | |
ureldatefmt.h | File | 17670 bytes | February 08 2022 16:22:59. | |
urename.h | File | 134109 bytes | February 08 2022 16:22:58. | |
urep.h | File | 5507 bytes | February 08 2022 16:22:58. | |
ures.h | File | 37415 bytes | February 08 2022 16:22:58. | |
uscript.h | File | 27510 bytes | February 08 2022 16:22:58. | |
usearch.h | File | 39039 bytes | February 08 2022 16:22:59. | |
uset.h | File | 40958 bytes | February 08 2022 16:22:58. | |
usetiter.h | File | 9781 bytes | February 08 2022 16:22:58. | |
ushape.h | File | 18432 bytes | February 08 2022 16:22:58. | |
uspoof.h | File | 67480 bytes | February 08 2022 16:22:59. | |
usprep.h | File | 8331 bytes | February 08 2022 16:22:58. | |
ustdio.h | File | 39469 bytes | February 08 2022 16:22:59. | |
ustream.h | File | 1934 bytes | February 08 2022 16:22:59. | |
ustring.h | File | 74211 bytes | February 08 2022 16:22:58. | |
ustringtrie.h | File | 3224 bytes | February 08 2022 16:22:58. | |
utext.h | File | 59527 bytes | February 08 2022 16:22:58. | |
utf.h | File | 8046 bytes | February 08 2022 16:22:58. | |
utf16.h | File | 23878 bytes | February 08 2022 16:22:58. | |
utf32.h | File | 763 bytes | February 08 2022 16:22:58. | |
utf8.h | File | 31700 bytes | February 08 2022 16:22:58. | |
utf_old.h | File | 46929 bytes | February 08 2022 16:22:58. | |
utmscale.h | File | 14113 bytes | February 08 2022 16:22:59. | |
utrace.h | File | 16112 bytes | February 08 2022 16:22:58. | |
utrans.h | File | 26130 bytes | February 08 2022 16:22:59. | |
utypes.h | File | 31481 bytes | February 08 2022 16:22:58. | |
uvernum.h | File | 6832 bytes | February 08 2022 16:22:58. | |
uversion.h | File | 6145 bytes | February 08 2022 16:22:58. | |
vtzone.h | File | 20784 bytes | February 08 2022 16:22:59. |
// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** * Copyright (C) 2005-2013, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucsdet.h * encoding: UTF-8 * indentation:4 * * created on: 2005Aug04 * created by: Andy Heninger * * ICU Character Set Detection, API for C * * Draft version 18 Oct 2005 * */ #ifndef __UCSDET_H #define __UCSDET_H #include "unicode/utypes.h" #if !UCONFIG_NO_CONVERSION #include "unicode/localpointer.h" #include "unicode/uenum.h" /** * \file * \brief C API: Charset Detection API * * This API provides a facility for detecting the * charset or encoding of character data in an unknown text format. * The input data can be from an array of bytes. * <p> * Character set detection is at best an imprecise operation. The detection * process will attempt to identify the charset that best matches the characteristics * of the byte data, but the process is partly statistical in nature, and * the results can not be guaranteed to always be correct. * <p> * For best accuracy in charset detection, the input data should be primarily * in a single language, and a minimum of a few hundred bytes worth of plain text * in the language are needed. The detection process will attempt to * ignore html or xml style markup that could otherwise obscure the content. * <p> * An alternative to the ICU Charset Detector is the * Compact Encoding Detector, https://github.com/google/compact_enc_det. * It often gives more accurate results, especially with short input samples. */ struct UCharsetDetector; /** * Structure representing a charset detector * @stable ICU 3.6 */ typedef struct UCharsetDetector UCharsetDetector; struct UCharsetMatch; /** * Opaque structure representing a match that was identified * from a charset detection operation. * @stable ICU 3.6 */ typedef struct UCharsetMatch UCharsetMatch; /** * Open a charset detector. * * @param status Any error conditions occurring during the open * operation are reported back in this variable. * @return the newly opened charset detector. * @stable ICU 3.6 */ U_STABLE UCharsetDetector * U_EXPORT2 ucsdet_open(UErrorCode *status); /** * Close a charset detector. All storage and any other resources * owned by this charset detector will be released. Failure to * close a charset detector when finished with it can result in * memory leaks in the application. * * @param ucsd The charset detector to be closed. * @stable ICU 3.6 */ U_STABLE void U_EXPORT2 ucsdet_close(UCharsetDetector *ucsd); #if U_SHOW_CPLUSPLUS_API U_NAMESPACE_BEGIN /** * \class LocalUCharsetDetectorPointer * "Smart pointer" class, closes a UCharsetDetector via ucsdet_close(). * For most methods see the LocalPointerBase base class. * * @see LocalPointerBase * @see LocalPointer * @stable ICU 4.4 */ U_DEFINE_LOCAL_OPEN_POINTER(LocalUCharsetDetectorPointer, UCharsetDetector, ucsdet_close); U_NAMESPACE_END #endif /** * Set the input byte data whose charset is to detected. * * Ownership of the input text byte array remains with the caller. * The input string must not be altered or deleted until the charset * detector is either closed or reset to refer to different input text. * * @param ucsd the charset detector to be used. * @param textIn the input text of unknown encoding. . * @param len the length of the input text, or -1 if the text * is NUL terminated. * @param status any error conditions are reported back in this variable. * * @stable ICU 3.6 */ U_STABLE void U_EXPORT2 ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status); /** Set the declared encoding for charset detection. * The declared encoding of an input text is an encoding obtained * by the user from an http header or xml declaration or similar source that * can be provided as an additional hint to the charset detector. * * How and whether the declared encoding will be used during the * detection process is TBD. * * @param ucsd the charset detector to be used. * @param encoding an encoding for the current data obtained from * a header or declaration or other source outside * of the byte data itself. * @param length the length of the encoding name, or -1 if the name string * is NUL terminated. * @param status any error conditions are reported back in this variable. * * @stable ICU 3.6 */ U_STABLE void U_EXPORT2 ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status); /** * Return the charset that best matches the supplied input data. * * Note though, that because the detection * only looks at the start of the input data, * there is a possibility that the returned charset will fail to handle * the full set of input data. * <p> * The returned UCharsetMatch object is owned by the UCharsetDetector. * It will remain valid until the detector input is reset, or until * the detector is closed. * <p> * The function will fail if * <ul> * <li>no charset appears to match the data.</li> * <li>no input text has been provided</li> * </ul> * * @param ucsd the charset detector to be used. * @param status any error conditions are reported back in this variable. * @return a UCharsetMatch representing the best matching charset, * or NULL if no charset matches the byte data. * * @stable ICU 3.6 */ U_STABLE const UCharsetMatch * U_EXPORT2 ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status); /** * Find all charset matches that appear to be consistent with the input, * returning an array of results. The results are ordered with the * best quality match first. * * Because the detection only looks at a limited amount of the * input byte data, some of the returned charsets may fail to handle * the all of input data. * <p> * The returned UCharsetMatch objects are owned by the UCharsetDetector. * They will remain valid until the detector is closed or modified * * <p> * Return an error if * <ul> * <li>no charsets appear to match the input data.</li> * <li>no input text has been provided</li> * </ul> * * @param ucsd the charset detector to be used. * @param matchesFound pointer to a variable that will be set to the * number of charsets identified that are consistent with * the input data. Output only. * @param status any error conditions are reported back in this variable. * @return A pointer to an array of pointers to UCharSetMatch objects. * This array, and the UCharSetMatch instances to which it refers, * are owned by the UCharsetDetector, and will remain valid until * the detector is closed or modified. * @stable ICU 3.6 */ U_STABLE const UCharsetMatch ** U_EXPORT2 ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status); /** * Get the name of the charset represented by a UCharsetMatch. * * The storage for the returned name string is owned by the * UCharsetMatch, and will remain valid while the UCharsetMatch * is valid. * * The name returned is suitable for use with the ICU conversion APIs. * * @param ucsm The charset match object. * @param status Any error conditions are reported back in this variable. * @return The name of the matching charset. * * @stable ICU 3.6 */ U_STABLE const char * U_EXPORT2 ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status); /** * Get a confidence number for the quality of the match of the byte * data with the charset. Confidence numbers range from zero to 100, * with 100 representing complete confidence and zero representing * no confidence. * * The confidence values are somewhat arbitrary. They define an * an ordering within the results for any single detection operation * but are not generally comparable between the results for different input. * * A confidence value of ten does have a general meaning - it is used * for charsets that can represent the input data, but for which there * is no other indication that suggests that the charset is the correct one. * Pure 7 bit ASCII data, for example, is compatible with a * great many charsets, most of which will appear as possible matches * with a confidence of 10. * * @param ucsm The charset match object. * @param status Any error conditions are reported back in this variable. * @return A confidence number for the charset match. * * @stable ICU 3.6 */ U_STABLE int32_t U_EXPORT2 ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status); /** * Get the RFC 3066 code for the language of the input data. * * The Charset Detection service is intended primarily for detecting * charsets, not language. For some, but not all, charsets, a language is * identified as a byproduct of the detection process, and that is what * is returned by this function. * * CAUTION: * 1. Language information is not available for input data encoded in * all charsets. In particular, no language is identified * for UTF-8 input data. * * 2. Closely related languages may sometimes be confused. * * If more accurate language detection is required, a linguistic * analysis package should be used. * * The storage for the returned name string is owned by the * UCharsetMatch, and will remain valid while the UCharsetMatch * is valid. * * @param ucsm The charset match object. * @param status Any error conditions are reported back in this variable. * @return The RFC 3066 code for the language of the input data, or * an empty string if the language could not be determined. * * @stable ICU 3.6 */ U_STABLE const char * U_EXPORT2 ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status); /** * Get the entire input text as a UChar string, placing it into * a caller-supplied buffer. A terminating * NUL character will be appended to the buffer if space is available. * * The number of UChars in the output string, not including the terminating * NUL, is returned. * * If the supplied buffer is smaller than required to hold the output, * the contents of the buffer are undefined. The full output string length * (in UChars) is returned as always, and can be used to allocate a buffer * of the correct size. * * * @param ucsm The charset match object. * @param buf A UChar buffer to be filled with the converted text data. * @param cap The capacity of the buffer in UChars. * @param status Any error conditions are reported back in this variable. * @return The number of UChars in the output string. * * @stable ICU 3.6 */ U_STABLE int32_t U_EXPORT2 ucsdet_getUChars(const UCharsetMatch *ucsm, UChar *buf, int32_t cap, UErrorCode *status); /** * Get an iterator over the set of all detectable charsets - * over the charsets that are known to the charset detection * service. * * The returned UEnumeration provides access to the names of * the charsets. * * <p> * The state of the Charset detector that is passed in does not * affect the result of this function, but requiring a valid, open * charset detector as a parameter insures that the charset detection * service has been safely initialized and that the required detection * data is available. * * <p> * <b>Note:</b> Multiple different charset encodings in a same family may use * a single shared name in this implementation. For example, this method returns * an array including "ISO-8859-1" (ISO Latin 1), but not including "windows-1252" * (Windows Latin 1). However, actual detection result could be "windows-1252" * when the input data matches Latin 1 code points with any points only available * in "windows-1252". * * @param ucsd a Charset detector. * @param status Any error conditions are reported back in this variable. * @return an iterator providing access to the detectable charset names. * @stable ICU 3.6 */ U_STABLE UEnumeration * U_EXPORT2 ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status); /** * Test whether input filtering is enabled for this charset detector. * Input filtering removes text that appears to be HTML or xml * markup from the input before applying the code page detection * heuristics. * * @param ucsd The charset detector to check. * @return TRUE if filtering is enabled. * @stable ICU 3.6 */ U_STABLE UBool U_EXPORT2 ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd); /** * Enable filtering of input text. If filtering is enabled, * text within angle brackets ("<" and ">") will be removed * before detection, which will remove most HTML or xml markup. * * @param ucsd the charset detector to be modified. * @param filter <code>true</code> to enable input text filtering. * @return The previous setting. * * @stable ICU 3.6 */ U_STABLE UBool U_EXPORT2 ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter); #ifndef U_HIDE_INTERNAL_API /** * Get an iterator over the set of detectable charsets - * over the charsets that are enabled by the specified charset detector. * * The returned UEnumeration provides access to the names of * the charsets. * * @param ucsd a Charset detector. * @param status Any error conditions are reported back in this variable. * @return an iterator providing access to the detectable charset names by * the specified charset detector. * @internal */ U_INTERNAL UEnumeration * U_EXPORT2 ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status); /** * Enable or disable individual charset encoding. * A name of charset encoding must be included in the names returned by * {@link #ucsdet_getAllDetectableCharsets()}. * * @param ucsd a Charset detector. * @param encoding encoding the name of charset encoding. * @param enabled <code>TRUE</code> to enable, or <code>FALSE</code> to disable the * charset encoding. * @param status receives the return status. When the name of charset encoding * is not supported, U_ILLEGAL_ARGUMENT_ERROR is set. * @internal */ U_INTERNAL void U_EXPORT2 ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status); #endif /* U_HIDE_INTERNAL_API */ #endif #endif /* __UCSDET_H */
SILENT KILLER Tool