Page Speed Optimization Libraries
1.3.25.1
|
00001 /* 00002 * Copyright 2010 Google Inc. 00003 * 00004 * Licensed under the Apache License, Version 2.0 (the "License"); 00005 * you may not use this file except in compliance with the License. 00006 * You may obtain a copy of the License at 00007 * 00008 * http:///www.apache.org/licenses/LICENSE-2.0 00009 * 00010 * Unless required by applicable law or agreed to in writing, software 00011 * distributed under the License is distributed on an "AS IS" BASIS, 00012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00013 * See the License for the specific language governing permissions and 00014 * limitations under the License. 00015 */ 00016 00018 00019 #ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_ 00020 #define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_ 00021 00022 #include <algorithm> 00023 #include <map> 00024 #include <vector> 00025 #include "net/instaweb/util/public/basictypes.h" 00026 #include "net/instaweb/htmlparse/public/html_name.h" 00027 #include "net/instaweb/util/public/string.h" 00028 #include "net/instaweb/util/public/string_util.h" 00029 00030 namespace net_instaweb { 00031 00032 class HtmlKeywords { 00033 public: 00038 static void Init(); 00039 00042 static void ShutDown(); 00043 00045 static const char* KeywordToString(HtmlName::Keyword keyword) { 00046 return singleton_->keyword_vector_[keyword]; 00047 } 00048 00051 static StringPiece Escape(const StringPiece& unescaped, GoogleString* buf) { 00052 return singleton_->EscapeHelper(unescaped, buf); 00053 } 00054 00062 static StringPiece Unescape(const StringPiece& escaped, GoogleString* buf, 00063 bool* decoding_error) { 00064 return singleton_->UnescapeHelper(escaped, buf, decoding_error); 00065 } 00066 00077 00081 static bool IsAutoClose(HtmlName::Keyword k1, HtmlName::Keyword k2) { 00082 return std::binary_search(singleton_->auto_close_.begin(), 00083 singleton_->auto_close_.end(), 00084 MakeKeywordPair(k1, k2)); 00085 } 00086 00090 static bool IsContained(HtmlName::Keyword k1, HtmlName::Keyword k2) { 00091 return std::binary_search(singleton_->contained_.begin(), 00092 singleton_->contained_.end(), 00093 MakeKeywordPair(k1, k2)); 00094 } 00095 00101 static bool IsOptionallyClosedTag(HtmlName::Keyword keyword) { 00102 return std::binary_search(singleton_->optionally_closed_.begin(), 00103 singleton_->optionally_closed_.end(), 00104 keyword); 00105 } 00106 00107 private: 00108 typedef int32 KeywordPair; 00109 typedef std::vector<KeywordPair> KeywordPairVec; 00110 typedef std::vector<HtmlName::Keyword> KeywordVec; 00111 00112 HtmlKeywords(); 00113 const char* UnescapeAttributeValue(); 00114 void InitEscapeSequences(); 00115 void InitAutoClose(); 00116 void InitContains(); 00117 void InitOptionallyClosedKeywords(); 00118 00136 bool TryUnescape(bool accumulate_numeric_code, 00137 uint32 numeric_value, 00138 const GoogleString& escape, 00139 bool was_terminated, 00140 GoogleString* buf) const; 00141 00143 static KeywordPair MakeKeywordPair(HtmlName::Keyword k1, 00144 HtmlName::Keyword k2) { 00145 return (static_cast<KeywordPair>(k1) << 16) | static_cast<KeywordPair>(k2); 00146 } 00147 00152 void AddCrossProduct(const StringPiece& k1_list, const StringPiece& k2_list, 00153 KeywordPairVec* kmap); 00154 void AddAutoClose(const StringPiece& k1_list, const StringPiece& k2_list) { 00155 AddCrossProduct(k1_list, k2_list, &auto_close_); 00156 } 00157 void AddContained(const StringPiece& k1_list, const StringPiece& k2_list) { 00158 AddCrossProduct(k1_list, k2_list, &contained_); 00159 } 00160 00162 void AddToSet(const StringPiece& klist, KeywordVec* kset); 00163 00164 static HtmlKeywords* singleton_; 00165 00166 StringPiece EscapeHelper(const StringPiece& unescaped, 00167 GoogleString* buf) const; 00168 StringPiece UnescapeHelper(const StringPiece& escaped, 00169 GoogleString* buf, 00170 bool* decoding_error) const; 00171 00172 typedef std::map<GoogleString, GoogleString, 00173 StringCompareInsensitive> StringStringMapInsensitive; 00174 typedef std::map<GoogleString, GoogleString> StringStringMapSensitive; 00175 StringStringMapInsensitive unescape_insensitive_map_; 00176 StringStringMapSensitive unescape_sensitive_map_; 00177 StringStringMapSensitive escape_map_; 00178 CharStarVector keyword_vector_; 00179 00182 KeywordPairVec auto_close_; 00183 KeywordPairVec contained_; 00184 KeywordVec optionally_closed_; 00185 00186 DISALLOW_COPY_AND_ASSIGN(HtmlKeywords); 00187 }; 00188 00189 } 00190 00191 #endif ///< NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_