Page Speed Optimization Libraries  1.4.26.1
net/instaweb/htmlparse/public/html_keywords.h
Go to the documentation of this file.
00001 /*
00002  * Copyright 2010 Google Inc.
00003  *
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  *
00008  *      http:///www.apache.org/licenses/LICENSE-2.0
00009  *
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 
00018 
00019 #ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
00020 #define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
00021 
00022 #include <algorithm>
00023 #include <vector>
00024 #include "net/instaweb/util/public/basictypes.h"
00025 #include "net/instaweb/htmlparse/public/html_name.h"
00026 #include "net/instaweb/util/public/sparse_hash_map.h"
00027 #include "net/instaweb/util/public/string.h"
00028 #include "net/instaweb/util/public/string_hash.h"
00029 #include "net/instaweb/util/public/string_util.h"
00030 
00031 namespace net_instaweb {
00032 
00033 class HtmlKeywords {
00034  public:
00039   static void Init();
00040 
00043   static void ShutDown();
00044 
00046   static const char* KeywordToString(HtmlName::Keyword keyword) {
00047     return singleton_->keyword_vector_[keyword];
00048   }
00049 
00052   static StringPiece Escape(const StringPiece& unescaped, GoogleString* buf) {
00053     return singleton_->EscapeHelper(unescaped, buf);
00054   }
00055 
00063   static StringPiece Unescape(const StringPiece& escaped, GoogleString* buf,
00064                               bool* decoding_error) {
00065     return singleton_->UnescapeHelper(escaped, buf, decoding_error);
00066   }
00067 
00078 
00082   static bool IsAutoClose(HtmlName::Keyword k1, HtmlName::Keyword k2) {
00083     return std::binary_search(singleton_->auto_close_.begin(),
00084                               singleton_->auto_close_.end(),
00085                               MakeKeywordPair(k1, k2));
00086   }
00087 
00091   static bool IsContained(HtmlName::Keyword k1, HtmlName::Keyword k2) {
00092     return std::binary_search(singleton_->contained_.begin(),
00093                               singleton_->contained_.end(),
00094                               MakeKeywordPair(k1, k2));
00095   }
00096 
00102   static bool IsOptionallyClosedTag(HtmlName::Keyword keyword) {
00103     return std::binary_search(singleton_->optionally_closed_.begin(),
00104                               singleton_->optionally_closed_.end(),
00105                               keyword);
00106   }
00107 
00108  private:
00109   typedef int32 KeywordPair; 
00110   typedef std::vector<KeywordPair> KeywordPairVec;
00111   typedef std::vector<HtmlName::Keyword> KeywordVec;
00112 
00113   HtmlKeywords();
00114   const char* UnescapeAttributeValue();
00115   void InitEscapeSequences();
00116   void InitAutoClose();
00117   void InitContains();
00118   void InitOptionallyClosedKeywords();
00119 
00137   bool TryUnescape(bool accumulate_numeric_code,
00138                    uint32 numeric_value,
00139                    const GoogleString& escape,
00140                    bool was_terminated,
00141                    GoogleString* buf) const;
00142 
00144   static KeywordPair MakeKeywordPair(HtmlName::Keyword k1,
00145                                      HtmlName::Keyword k2) {
00146     return (static_cast<KeywordPair>(k1) << 16) | static_cast<KeywordPair>(k2);
00147   }
00148 
00153   void AddCrossProduct(const StringPiece& k1_list, const StringPiece& k2_list,
00154                        KeywordPairVec* kmap);
00155   void AddAutoClose(const StringPiece& k1_list, const StringPiece& k2_list) {
00156     AddCrossProduct(k1_list, k2_list, &auto_close_);
00157   }
00158   void AddContained(const StringPiece& k1_list, const StringPiece& k2_list) {
00159     AddCrossProduct(k1_list, k2_list, &contained_);
00160   }
00161 
00163   void AddToSet(const StringPiece& klist, KeywordVec* kset);
00164 
00165   static HtmlKeywords* singleton_;
00166 
00167   StringPiece EscapeHelper(const StringPiece& unescaped,
00168                            GoogleString* buf) const;
00169   StringPiece UnescapeHelper(const StringPiece& escaped,
00170                              GoogleString* buf,
00171                              bool* decoding_error) const;
00172 
00178   typedef sparse_hash_map<
00179     GoogleString, const char*,
00180     CaseFoldStringHash,
00181     CaseFoldStringEqual> StringStringSparseHashMapInsensitive;
00182   typedef sparse_hash_map<
00183     GoogleString, const char*,
00184     CasePreserveStringHash> StringStringSparseHashMapSensitive;
00185 
00186   StringStringSparseHashMapInsensitive unescape_insensitive_map_;
00187   StringStringSparseHashMapSensitive unescape_sensitive_map_;
00188   StringStringSparseHashMapSensitive escape_map_;
00189 
00190   CharStarVector keyword_vector_;
00191 
00194   KeywordPairVec auto_close_;
00195   KeywordPairVec contained_;
00196   KeywordVec optionally_closed_;
00197 
00198   DISALLOW_COPY_AND_ASSIGN(HtmlKeywords);
00199 };
00200 
00201 }  
00202 
00203 #endif  ///< NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines