Page Speed Optimization Libraries  1.5.27.2
net/instaweb/htmlparse/public/html_keywords.h
Go to the documentation of this file.
00001 /*
00002  * Copyright 2010 Google Inc.
00003  *
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  *
00008  *      http:///www.apache.org/licenses/LICENSE-2.0
00009  *
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 
00018 
00019 #ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
00020 #define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
00021 
00022 #include <algorithm>
00023 #include <vector>
00024 #include "net/instaweb/util/public/basictypes.h"
00025 #include "net/instaweb/htmlparse/public/html_name.h"
00026 #include "net/instaweb/util/public/string.h"
00027 #include "net/instaweb/util/public/string_hash.h"
00028 #include "net/instaweb/util/public/string_util.h"
00029 #include "pagespeed/kernel/util/sparse_hash_map.h"
00030 
00031 namespace net_instaweb {
00032 
00036 class HtmlKeywords {
00037  public:
00042   static void Init();
00043 
00046   static void ShutDown();
00047 
00049   static const char* KeywordToString(HtmlName::Keyword keyword) {
00050     return singleton_->keyword_vector_[keyword];
00051   }
00052 
00055   static StringPiece Escape(const StringPiece& unescaped, GoogleString* buf) {
00056     return singleton_->EscapeHelper(unescaped, buf);
00057   }
00058 
00066   static StringPiece Unescape(const StringPiece& escaped, GoogleString* buf,
00067                               bool* decoding_error) {
00068     return singleton_->UnescapeHelper(escaped, buf, decoding_error);
00069   }
00070 
00081 
00085   static bool IsAutoClose(HtmlName::Keyword k1, HtmlName::Keyword k2) {
00086     return std::binary_search(singleton_->auto_close_.begin(),
00087                               singleton_->auto_close_.end(),
00088                               MakeKeywordPair(k1, k2));
00089   }
00090 
00094   static bool IsContained(HtmlName::Keyword k1, HtmlName::Keyword k2) {
00095     return std::binary_search(singleton_->contained_.begin(),
00096                               singleton_->contained_.end(),
00097                               MakeKeywordPair(k1, k2));
00098   }
00099 
00105   static bool IsOptionallyClosedTag(HtmlName::Keyword keyword) {
00106     return std::binary_search(singleton_->optionally_closed_.begin(),
00107                               singleton_->optionally_closed_.end(),
00108                               keyword);
00109   }
00110 
00111  private:
00112   typedef int32 KeywordPair; 
00113   typedef std::vector<KeywordPair> KeywordPairVec;
00114   typedef std::vector<HtmlName::Keyword> KeywordVec;
00115 
00116   HtmlKeywords();
00117   const char* UnescapeAttributeValue();
00118   void InitEscapeSequences();
00119   void InitAutoClose();
00120   void InitContains();
00121   void InitOptionallyClosedKeywords();
00122 
00140   bool TryUnescape(bool accumulate_numeric_code,
00141                    uint32 numeric_value,
00142                    const GoogleString& escape,
00143                    bool was_terminated,
00144                    GoogleString* buf) const;
00145 
00147   static KeywordPair MakeKeywordPair(HtmlName::Keyword k1,
00148                                      HtmlName::Keyword k2) {
00149     return (static_cast<KeywordPair>(k1) << 16) | static_cast<KeywordPair>(k2);
00150   }
00151 
00156   void AddCrossProduct(const StringPiece& k1_list, const StringPiece& k2_list,
00157                        KeywordPairVec* kmap);
00158   void AddAutoClose(const StringPiece& k1_list, const StringPiece& k2_list) {
00159     AddCrossProduct(k1_list, k2_list, &auto_close_);
00160   }
00161   void AddContained(const StringPiece& k1_list, const StringPiece& k2_list) {
00162     AddCrossProduct(k1_list, k2_list, &contained_);
00163   }
00164 
00166   void AddToSet(const StringPiece& klist, KeywordVec* kset);
00167 
00168   static HtmlKeywords* singleton_;
00169 
00170   StringPiece EscapeHelper(const StringPiece& unescaped,
00171                            GoogleString* buf) const;
00172   StringPiece UnescapeHelper(const StringPiece& escaped,
00173                              GoogleString* buf,
00174                              bool* decoding_error) const;
00175 
00181   typedef sparse_hash_map<
00182     GoogleString, const char*,
00183     CaseFoldStringHash,
00184     CaseFoldStringEqual> StringStringSparseHashMapInsensitive;
00185   typedef sparse_hash_map<
00186     GoogleString, const char*,
00187     CasePreserveStringHash> StringStringSparseHashMapSensitive;
00188 
00189   StringStringSparseHashMapInsensitive unescape_insensitive_map_;
00190   StringStringSparseHashMapSensitive unescape_sensitive_map_;
00191   StringStringSparseHashMapSensitive escape_map_;
00192 
00193   CharStarVector keyword_vector_;
00194 
00197   KeywordPairVec auto_close_;
00198   KeywordPairVec contained_;
00199   KeywordVec optionally_closed_;
00200 
00201   DISALLOW_COPY_AND_ASSIGN(HtmlKeywords);
00202 };
00203 
00204 }  
00205 
00206 #endif  ///< NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines