Page Speed Optimization Libraries  1.3.25.1
net/instaweb/htmlparse/public/html_keywords.h
Go to the documentation of this file.
00001 /*
00002  * Copyright 2010 Google Inc.
00003  *
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  *
00008  *      http:///www.apache.org/licenses/LICENSE-2.0
00009  *
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 
00018 
00019 #ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
00020 #define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
00021 
00022 #include <algorithm>
00023 #include <map>
00024 #include <vector>
00025 #include "net/instaweb/util/public/basictypes.h"
00026 #include "net/instaweb/htmlparse/public/html_name.h"
00027 #include "net/instaweb/util/public/string.h"
00028 #include "net/instaweb/util/public/string_util.h"
00029 
00030 namespace net_instaweb {
00031 
00032 class HtmlKeywords {
00033  public:
00038   static void Init();
00039 
00042   static void ShutDown();
00043 
00045   static const char* KeywordToString(HtmlName::Keyword keyword) {
00046     return singleton_->keyword_vector_[keyword];
00047   }
00048 
00051   static StringPiece Escape(const StringPiece& unescaped, GoogleString* buf) {
00052     return singleton_->EscapeHelper(unescaped, buf);
00053   }
00054 
00062   static StringPiece Unescape(const StringPiece& escaped, GoogleString* buf,
00063                               bool* decoding_error) {
00064     return singleton_->UnescapeHelper(escaped, buf, decoding_error);
00065   }
00066 
00077 
00081   static bool IsAutoClose(HtmlName::Keyword k1, HtmlName::Keyword k2) {
00082     return std::binary_search(singleton_->auto_close_.begin(),
00083                               singleton_->auto_close_.end(),
00084                               MakeKeywordPair(k1, k2));
00085   }
00086 
00090   static bool IsContained(HtmlName::Keyword k1, HtmlName::Keyword k2) {
00091     return std::binary_search(singleton_->contained_.begin(),
00092                               singleton_->contained_.end(),
00093                               MakeKeywordPair(k1, k2));
00094   }
00095 
00101   static bool IsOptionallyClosedTag(HtmlName::Keyword keyword) {
00102     return std::binary_search(singleton_->optionally_closed_.begin(),
00103                               singleton_->optionally_closed_.end(),
00104                               keyword);
00105   }
00106 
00107  private:
00108   typedef int32 KeywordPair; 
00109   typedef std::vector<KeywordPair> KeywordPairVec;
00110   typedef std::vector<HtmlName::Keyword> KeywordVec;
00111 
00112   HtmlKeywords();
00113   const char* UnescapeAttributeValue();
00114   void InitEscapeSequences();
00115   void InitAutoClose();
00116   void InitContains();
00117   void InitOptionallyClosedKeywords();
00118 
00136   bool TryUnescape(bool accumulate_numeric_code,
00137                    uint32 numeric_value,
00138                    const GoogleString& escape,
00139                    bool was_terminated,
00140                    GoogleString* buf) const;
00141 
00143   static KeywordPair MakeKeywordPair(HtmlName::Keyword k1,
00144                                      HtmlName::Keyword k2) {
00145     return (static_cast<KeywordPair>(k1) << 16) | static_cast<KeywordPair>(k2);
00146   }
00147 
00152   void AddCrossProduct(const StringPiece& k1_list, const StringPiece& k2_list,
00153                        KeywordPairVec* kmap);
00154   void AddAutoClose(const StringPiece& k1_list, const StringPiece& k2_list) {
00155     AddCrossProduct(k1_list, k2_list, &auto_close_);
00156   }
00157   void AddContained(const StringPiece& k1_list, const StringPiece& k2_list) {
00158     AddCrossProduct(k1_list, k2_list, &contained_);
00159   }
00160 
00162   void AddToSet(const StringPiece& klist, KeywordVec* kset);
00163 
00164   static HtmlKeywords* singleton_;
00165 
00166   StringPiece EscapeHelper(const StringPiece& unescaped,
00167                            GoogleString* buf) const;
00168   StringPiece UnescapeHelper(const StringPiece& escaped,
00169                              GoogleString* buf,
00170                              bool* decoding_error) const;
00171 
00172   typedef std::map<GoogleString, GoogleString,
00173                    StringCompareInsensitive> StringStringMapInsensitive;
00174   typedef std::map<GoogleString, GoogleString> StringStringMapSensitive;
00175   StringStringMapInsensitive unescape_insensitive_map_;
00176   StringStringMapSensitive unescape_sensitive_map_;
00177   StringStringMapSensitive escape_map_;
00178   CharStarVector keyword_vector_;
00179 
00182   KeywordPairVec auto_close_;
00183   KeywordPairVec contained_;
00184   KeywordVec optionally_closed_;
00185 
00186   DISALLOW_COPY_AND_ASSIGN(HtmlKeywords);
00187 };
00188 
00189 }  
00190 
00191 #endif  ///< NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines