Page Speed Optimization Libraries
1.4.26.1
|
00001 /* 00002 * Copyright 2010 Google Inc. 00003 * 00004 * Licensed under the Apache License, Version 2.0 (the "License"); 00005 * you may not use this file except in compliance with the License. 00006 * You may obtain a copy of the License at 00007 * 00008 * http:///www.apache.org/licenses/LICENSE-2.0 00009 * 00010 * Unless required by applicable law or agreed to in writing, software 00011 * distributed under the License is distributed on an "AS IS" BASIS, 00012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00013 * See the License for the specific language governing permissions and 00014 * limitations under the License. 00015 */ 00016 00018 00019 #ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_ 00020 #define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_ 00021 00022 #include <algorithm> 00023 #include <vector> 00024 #include "net/instaweb/util/public/basictypes.h" 00025 #include "net/instaweb/htmlparse/public/html_name.h" 00026 #include "net/instaweb/util/public/sparse_hash_map.h" 00027 #include "net/instaweb/util/public/string.h" 00028 #include "net/instaweb/util/public/string_hash.h" 00029 #include "net/instaweb/util/public/string_util.h" 00030 00031 namespace net_instaweb { 00032 00033 class HtmlKeywords { 00034 public: 00039 static void Init(); 00040 00043 static void ShutDown(); 00044 00046 static const char* KeywordToString(HtmlName::Keyword keyword) { 00047 return singleton_->keyword_vector_[keyword]; 00048 } 00049 00052 static StringPiece Escape(const StringPiece& unescaped, GoogleString* buf) { 00053 return singleton_->EscapeHelper(unescaped, buf); 00054 } 00055 00063 static StringPiece Unescape(const StringPiece& escaped, GoogleString* buf, 00064 bool* decoding_error) { 00065 return singleton_->UnescapeHelper(escaped, buf, decoding_error); 00066 } 00067 00078 00082 static bool IsAutoClose(HtmlName::Keyword k1, HtmlName::Keyword k2) { 00083 return std::binary_search(singleton_->auto_close_.begin(), 00084 singleton_->auto_close_.end(), 00085 MakeKeywordPair(k1, k2)); 00086 } 00087 00091 static bool IsContained(HtmlName::Keyword k1, HtmlName::Keyword k2) { 00092 return std::binary_search(singleton_->contained_.begin(), 00093 singleton_->contained_.end(), 00094 MakeKeywordPair(k1, k2)); 00095 } 00096 00102 static bool IsOptionallyClosedTag(HtmlName::Keyword keyword) { 00103 return std::binary_search(singleton_->optionally_closed_.begin(), 00104 singleton_->optionally_closed_.end(), 00105 keyword); 00106 } 00107 00108 private: 00109 typedef int32 KeywordPair; 00110 typedef std::vector<KeywordPair> KeywordPairVec; 00111 typedef std::vector<HtmlName::Keyword> KeywordVec; 00112 00113 HtmlKeywords(); 00114 const char* UnescapeAttributeValue(); 00115 void InitEscapeSequences(); 00116 void InitAutoClose(); 00117 void InitContains(); 00118 void InitOptionallyClosedKeywords(); 00119 00137 bool TryUnescape(bool accumulate_numeric_code, 00138 uint32 numeric_value, 00139 const GoogleString& escape, 00140 bool was_terminated, 00141 GoogleString* buf) const; 00142 00144 static KeywordPair MakeKeywordPair(HtmlName::Keyword k1, 00145 HtmlName::Keyword k2) { 00146 return (static_cast<KeywordPair>(k1) << 16) | static_cast<KeywordPair>(k2); 00147 } 00148 00153 void AddCrossProduct(const StringPiece& k1_list, const StringPiece& k2_list, 00154 KeywordPairVec* kmap); 00155 void AddAutoClose(const StringPiece& k1_list, const StringPiece& k2_list) { 00156 AddCrossProduct(k1_list, k2_list, &auto_close_); 00157 } 00158 void AddContained(const StringPiece& k1_list, const StringPiece& k2_list) { 00159 AddCrossProduct(k1_list, k2_list, &contained_); 00160 } 00161 00163 void AddToSet(const StringPiece& klist, KeywordVec* kset); 00164 00165 static HtmlKeywords* singleton_; 00166 00167 StringPiece EscapeHelper(const StringPiece& unescaped, 00168 GoogleString* buf) const; 00169 StringPiece UnescapeHelper(const StringPiece& escaped, 00170 GoogleString* buf, 00171 bool* decoding_error) const; 00172 00178 typedef sparse_hash_map< 00179 GoogleString, const char*, 00180 CaseFoldStringHash, 00181 CaseFoldStringEqual> StringStringSparseHashMapInsensitive; 00182 typedef sparse_hash_map< 00183 GoogleString, const char*, 00184 CasePreserveStringHash> StringStringSparseHashMapSensitive; 00185 00186 StringStringSparseHashMapInsensitive unescape_insensitive_map_; 00187 StringStringSparseHashMapSensitive unescape_sensitive_map_; 00188 StringStringSparseHashMapSensitive escape_map_; 00189 00190 CharStarVector keyword_vector_; 00191 00194 KeywordPairVec auto_close_; 00195 KeywordPairVec contained_; 00196 KeywordVec optionally_closed_; 00197 00198 DISALLOW_COPY_AND_ASSIGN(HtmlKeywords); 00199 }; 00200 00201 } 00202 00203 #endif ///< NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_