Page Speed Optimization Libraries
1.5.27.2
|
00001 /* 00002 * Copyright 2010 Google Inc. 00003 * 00004 * Licensed under the Apache License, Version 2.0 (the "License"); 00005 * you may not use this file except in compliance with the License. 00006 * You may obtain a copy of the License at 00007 * 00008 * http:///www.apache.org/licenses/LICENSE-2.0 00009 * 00010 * Unless required by applicable law or agreed to in writing, software 00011 * distributed under the License is distributed on an "AS IS" BASIS, 00012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00013 * See the License for the specific language governing permissions and 00014 * limitations under the License. 00015 */ 00016 00018 00019 #ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_ 00020 #define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_ 00021 00022 #include <algorithm> 00023 #include <vector> 00024 #include "net/instaweb/util/public/basictypes.h" 00025 #include "net/instaweb/htmlparse/public/html_name.h" 00026 #include "net/instaweb/util/public/string.h" 00027 #include "net/instaweb/util/public/string_hash.h" 00028 #include "net/instaweb/util/public/string_util.h" 00029 #include "pagespeed/kernel/util/sparse_hash_map.h" 00030 00031 namespace net_instaweb { 00032 00036 class HtmlKeywords { 00037 public: 00042 static void Init(); 00043 00046 static void ShutDown(); 00047 00049 static const char* KeywordToString(HtmlName::Keyword keyword) { 00050 return singleton_->keyword_vector_[keyword]; 00051 } 00052 00055 static StringPiece Escape(const StringPiece& unescaped, GoogleString* buf) { 00056 return singleton_->EscapeHelper(unescaped, buf); 00057 } 00058 00066 static StringPiece Unescape(const StringPiece& escaped, GoogleString* buf, 00067 bool* decoding_error) { 00068 return singleton_->UnescapeHelper(escaped, buf, decoding_error); 00069 } 00070 00081 00085 static bool IsAutoClose(HtmlName::Keyword k1, HtmlName::Keyword k2) { 00086 return std::binary_search(singleton_->auto_close_.begin(), 00087 singleton_->auto_close_.end(), 00088 MakeKeywordPair(k1, k2)); 00089 } 00090 00094 static bool IsContained(HtmlName::Keyword k1, HtmlName::Keyword k2) { 00095 return std::binary_search(singleton_->contained_.begin(), 00096 singleton_->contained_.end(), 00097 MakeKeywordPair(k1, k2)); 00098 } 00099 00105 static bool IsOptionallyClosedTag(HtmlName::Keyword keyword) { 00106 return std::binary_search(singleton_->optionally_closed_.begin(), 00107 singleton_->optionally_closed_.end(), 00108 keyword); 00109 } 00110 00111 private: 00112 typedef int32 KeywordPair; 00113 typedef std::vector<KeywordPair> KeywordPairVec; 00114 typedef std::vector<HtmlName::Keyword> KeywordVec; 00115 00116 HtmlKeywords(); 00117 const char* UnescapeAttributeValue(); 00118 void InitEscapeSequences(); 00119 void InitAutoClose(); 00120 void InitContains(); 00121 void InitOptionallyClosedKeywords(); 00122 00140 bool TryUnescape(bool accumulate_numeric_code, 00141 uint32 numeric_value, 00142 const GoogleString& escape, 00143 bool was_terminated, 00144 GoogleString* buf) const; 00145 00147 static KeywordPair MakeKeywordPair(HtmlName::Keyword k1, 00148 HtmlName::Keyword k2) { 00149 return (static_cast<KeywordPair>(k1) << 16) | static_cast<KeywordPair>(k2); 00150 } 00151 00156 void AddCrossProduct(const StringPiece& k1_list, const StringPiece& k2_list, 00157 KeywordPairVec* kmap); 00158 void AddAutoClose(const StringPiece& k1_list, const StringPiece& k2_list) { 00159 AddCrossProduct(k1_list, k2_list, &auto_close_); 00160 } 00161 void AddContained(const StringPiece& k1_list, const StringPiece& k2_list) { 00162 AddCrossProduct(k1_list, k2_list, &contained_); 00163 } 00164 00166 void AddToSet(const StringPiece& klist, KeywordVec* kset); 00167 00168 static HtmlKeywords* singleton_; 00169 00170 StringPiece EscapeHelper(const StringPiece& unescaped, 00171 GoogleString* buf) const; 00172 StringPiece UnescapeHelper(const StringPiece& escaped, 00173 GoogleString* buf, 00174 bool* decoding_error) const; 00175 00181 typedef sparse_hash_map< 00182 GoogleString, const char*, 00183 CaseFoldStringHash, 00184 CaseFoldStringEqual> StringStringSparseHashMapInsensitive; 00185 typedef sparse_hash_map< 00186 GoogleString, const char*, 00187 CasePreserveStringHash> StringStringSparseHashMapSensitive; 00188 00189 StringStringSparseHashMapInsensitive unescape_insensitive_map_; 00190 StringStringSparseHashMapSensitive unescape_sensitive_map_; 00191 StringStringSparseHashMapSensitive escape_map_; 00192 00193 CharStarVector keyword_vector_; 00194 00197 KeywordPairVec auto_close_; 00198 KeywordPairVec contained_; 00199 KeywordVec optionally_closed_; 00200 00201 DISALLOW_COPY_AND_ASSIGN(HtmlKeywords); 00202 }; 00203 00204 } 00205 00206 #endif ///< NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_