00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00018
00019 #ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
00020 #define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
00021
00022 #include <algorithm>
00023 #include <map>
00024 #include <vector>
00025 #include "net/instaweb/util/public/basictypes.h"
00026 #include "net/instaweb/htmlparse/public/html_name.h"
00027 #include "net/instaweb/util/public/string.h"
00028 #include "net/instaweb/util/public/string_util.h"
00029
00030 namespace net_instaweb {
00031
00032 class HtmlKeywords {
00033 public:
00038 static void Init();
00039
00042 static void ShutDown();
00043
00045 static const char* KeywordToString(HtmlName::Keyword keyword) {
00046 return singleton_->keyword_vector_[keyword];
00047 }
00048
00051 static StringPiece Escape(const StringPiece& unescaped, GoogleString* buf) {
00052 return singleton_->EscapeHelper(unescaped, buf);
00053 }
00054
00062 static StringPiece Unescape(const StringPiece& escaped, GoogleString* buf,
00063 bool* decoding_error) {
00064 return singleton_->UnescapeHelper(escaped, buf, decoding_error);
00065 }
00066
00077
00081 static bool IsAutoClose(HtmlName::Keyword k1, HtmlName::Keyword k2) {
00082 return std::binary_search(singleton_->auto_close_.begin(),
00083 singleton_->auto_close_.end(),
00084 MakeKeywordPair(k1, k2));
00085 }
00086
00090 static bool IsContained(HtmlName::Keyword k1, HtmlName::Keyword k2) {
00091 return std::binary_search(singleton_->contained_.begin(),
00092 singleton_->contained_.end(),
00093 MakeKeywordPair(k1, k2));
00094 }
00095
00101 static bool IsOptionallyClosedTag(HtmlName::Keyword keyword) {
00102 return std::binary_search(singleton_->optionally_closed_.begin(),
00103 singleton_->optionally_closed_.end(),
00104 keyword);
00105 }
00106
00107 private:
00108 typedef int32 KeywordPair;
00109 typedef std::vector<KeywordPair> KeywordPairVec;
00110 typedef std::vector<HtmlName::Keyword> KeywordVec;
00111
00112 HtmlKeywords();
00113 const char* UnescapeAttributeValue();
00114 void InitEscapeSequences();
00115 void InitAutoClose();
00116 void InitContains();
00117 void InitOptionallyClosedKeywords();
00118
00136 bool TryUnescape(bool accumulate_numeric_code,
00137 uint32 numeric_value,
00138 const GoogleString& escape,
00139 bool was_terminated,
00140 GoogleString* buf) const;
00141
00143 static KeywordPair MakeKeywordPair(HtmlName::Keyword k1,
00144 HtmlName::Keyword k2) {
00145 return (static_cast<KeywordPair>(k1) << 16) | static_cast<KeywordPair>(k2);
00146 }
00147
00152 void AddCrossProduct(const StringPiece& k1_list, const StringPiece& k2_list,
00153 KeywordPairVec* kmap);
00154 void AddAutoClose(const StringPiece& k1_list, const StringPiece& k2_list) {
00155 AddCrossProduct(k1_list, k2_list, &auto_close_);
00156 }
00157 void AddContained(const StringPiece& k1_list, const StringPiece& k2_list) {
00158 AddCrossProduct(k1_list, k2_list, &contained_);
00159 }
00160
00162 void AddToSet(const StringPiece& klist, KeywordVec* kset);
00163
00164 static HtmlKeywords* singleton_;
00165
00166 StringPiece EscapeHelper(const StringPiece& unescaped,
00167 GoogleString* buf) const;
00168 StringPiece UnescapeHelper(const StringPiece& escaped,
00169 GoogleString* buf,
00170 bool* decoding_error) const;
00171
00172 typedef std::map<GoogleString, GoogleString,
00173 StringCompareInsensitive> StringStringMapInsensitive;
00174 typedef std::map<GoogleString, GoogleString> StringStringMapSensitive;
00175 StringStringMapInsensitive unescape_insensitive_map_;
00176 StringStringMapSensitive unescape_sensitive_map_;
00177 StringStringMapSensitive escape_map_;
00178 CharStarVector keyword_vector_;
00179
00182 KeywordPairVec auto_close_;
00183 KeywordPairVec contained_;
00184 KeywordVec optionally_closed_;
00185
00186 DISALLOW_COPY_AND_ASSIGN(HtmlKeywords);
00187 };
00188
00189 }
00190
00191 #endif ///< NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_