00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00018
00019 #ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_ELEMENT_H_
00020 #define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_ELEMENT_H_
00021
00022 #include <vector>
00023
00024 #include "base/scoped_ptr.h"
00025 #include "net/instaweb/util/public/basictypes.h"
00026 #include "net/instaweb/htmlparse/public/html_name.h"
00027 #include "net/instaweb/htmlparse/public/html_node.h"
00028 #include "net/instaweb/htmlparse/public/html_parser_types.h"
00029 #include "net/instaweb/util/public/string.h"
00030 #include "net/instaweb/util/public/string_util.h"
00031
00032 namespace net_instaweb {
00033
00034 class HtmlElement : public HtmlNode {
00035 public:
00042 enum CloseStyle {
00043 AUTO_CLOSE,
00044 IMPLICIT_CLOSE,
00045 EXPLICIT_CLOSE,
00046 BRIEF_CLOSE,
00047 UNCLOSED
00048 };
00049
00051 enum QuoteStyle {
00052 NO_QUOTE,
00053 SINGLE_QUOTE,
00054 DOUBLE_QUOTE
00055 };
00056
00057 class Attribute {
00058 public:
00062
00066
00070 const char* name_str() const { return name_.c_str(); }
00071
00075 HtmlName::Keyword keyword() const { return name_.keyword(); }
00076
00077 HtmlName name() const { return name_; }
00078 void set_name(const HtmlName& name) { name_ = name; }
00079
00082 const char* escaped_value() const { return escaped_value_.get(); }
00083
00109 const char* DecodedValueOrNull() const {
00110 if (!decoded_value_computed_) {
00111 ComputeDecodedValue();
00112 }
00113 return decoded_value_.get();
00114 }
00115
00116 void set_decoding_error(bool x) { decoding_error_ = x; }
00117 bool decoding_error() const {
00118 if (!decoded_value_computed_) {
00119 ComputeDecodedValue();
00120 }
00121 return decoding_error_;
00122 }
00123
00126 QuoteStyle quote_style() const { return quote_style_; }
00127
00129 const char* quote_str() const;
00130
00144
00149 void SetValue(const StringPiece& value);
00150
00154 void SetEscapedValue(const StringPiece& value);
00155
00156 void set_quote_style(QuoteStyle new_quote_style) {
00157 quote_style_ = new_quote_style;
00158 }
00159
00160 friend class HtmlElement;
00161
00162 private:
00163 void ComputeDecodedValue() const;
00164
00166 Attribute(const HtmlName& name, const StringPiece& escaped_value,
00167 QuoteStyle quote_style);
00168
00169 static inline void CopyValue(const StringPiece& src,
00170 scoped_array<char>* dst);
00171
00172 HtmlName name_;
00173 QuoteStyle quote_style_ : 8;
00174 mutable bool decoding_error_;
00175 mutable bool decoded_value_computed_;
00176
00185 scoped_array<char> escaped_value_;
00186
00203 mutable scoped_array<char> decoded_value_;
00204
00205 DISALLOW_COPY_AND_ASSIGN(Attribute);
00206 };
00207
00208 public:
00209 virtual ~HtmlElement();
00210
00211 virtual bool live() const { return (data_.get() != NULL) && data_->live_; }
00212 virtual void MarkAsDead(const HtmlEventListIterator& end);
00213
00216 void AddAttribute(const Attribute& attr);
00217
00227 void AddAttribute(const HtmlName& name,
00228 const StringPiece& decoded_value,
00229 QuoteStyle quote_style);
00231 void AddEscapedAttribute(const HtmlName& name,
00232 const StringPiece& escaped_value,
00233 QuoteStyle quote_style);
00234
00238 void DeleteAttribute(int i);
00239
00242 bool DeleteAttribute(HtmlName::Keyword keyword);
00243
00247 const Attribute* FindAttribute(HtmlName::Keyword keyword) const;
00248 Attribute* FindAttribute(HtmlName::Keyword keyword) {
00249 const HtmlElement* const_this = this;
00250 const Attribute* result = const_this->FindAttribute(keyword);
00251 return const_cast<Attribute*>(result);
00252 }
00253
00263 const char* AttributeValue(HtmlName::Keyword name) const {
00264 const Attribute* attribute = FindAttribute(name);
00265 if (attribute != NULL) {
00266 return attribute->DecodedValueOrNull();
00267 }
00268 return NULL;
00269 }
00270
00274 const char* name_str() const { return data_->name_.c_str(); }
00275
00279 HtmlName::Keyword keyword() const { return data_->name_.keyword(); }
00280
00281 const HtmlName& name() const { return data_->name_; }
00282
00286 void set_name(const HtmlName& new_tag) { data_->name_ = new_tag; }
00287
00288 int attribute_size() const { return data_->attributes_.size(); }
00289 const Attribute& attribute(int i) const { return *data_->attributes_[i]; }
00290 Attribute& attribute(int i) { return *data_->attributes_[i]; }
00291
00292 friend class HtmlParse;
00293 friend class HtmlLexer;
00294
00295 CloseStyle close_style() const { return data_->close_style_; }
00296 void set_close_style(CloseStyle style) { data_->close_style_ = style; }
00297
00300 void ToString(GoogleString* buf) const;
00301 void DebugPrint() const;
00302
00303 int begin_line_number() const { return data_->begin_line_number_; }
00304 int end_line_number() const { return data_->end_line_number_; }
00305
00306 protected:
00307 virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
00308 HtmlEventList* queue);
00309
00310 virtual HtmlEventListIterator begin() const { return data_->begin_; }
00311 virtual HtmlEventListIterator end() const { return data_->end_; }
00312
00313 private:
00316 struct Data {
00317 Data(const HtmlName& name,
00318 const HtmlEventListIterator& begin,
00319 const HtmlEventListIterator& end);
00320 ~Data();
00321 inline void Clear();
00322
00334 unsigned begin_line_number_ : 24;
00335 unsigned live_ : 8;
00336 unsigned end_line_number_ : 24;
00337 CloseStyle close_style_ : 8;
00338
00339 HtmlName name_;
00340 std::vector<Attribute*> attributes_;
00341 HtmlEventListIterator begin_;
00342 HtmlEventListIterator end_;
00343 };
00344
00348 void set_begin(const HtmlEventListIterator& begin) { data_->begin_ = begin; }
00349 void set_end(const HtmlEventListIterator& end) { data_->end_ = end; }
00350
00351 void set_begin_line_number(int line) { data_->begin_line_number_ = line; }
00352 void set_end_line_number(int line) { data_->end_line_number_ = line; }
00353
00355 HtmlElement(HtmlElement* parent, const HtmlName& name,
00356 const HtmlEventListIterator& begin,
00357 const HtmlEventListIterator& end);
00358
00364 void FreeData() { data_.reset(NULL); }
00365
00366 scoped_ptr<Data> data_;
00367
00368 DISALLOW_COPY_AND_ASSIGN(HtmlElement);
00369 };
00370
00371 }
00372
00373 #endif ///< NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_ELEMENT_H_