19 #ifndef PAGESPEED_KERNEL_HTML_HTML_LEXER_H_
20 #define PAGESPEED_KERNEL_HTML_HTML_LEXER_H_
33 namespace net_instaweb {
55 void Parse(
const char* text,
int size);
100 inline void EvalStart(
char c);
101 inline void EvalTag(
char c);
102 inline void EvalTagOpen(
char c);
103 inline void EvalTagCloseNoName(
char c);
104 inline void EvalTagClose(
char c);
105 inline void EvalTagBriefClose(
char c);
106 inline void EvalCommentStart1(
char c);
107 inline void EvalCommentStart2(
char c);
108 inline void EvalCommentBody(
char c);
109 inline void EvalCommentEnd1(
char c);
110 inline void EvalCommentEnd2(
char c);
111 inline void EvalCdataStart1(
char c);
112 inline void EvalCdataStart2(
char c);
113 inline void EvalCdataStart3(
char c);
114 inline void EvalCdataStart4(
char c);
115 inline void EvalCdataStart5(
char c);
116 inline void EvalCdataStart6(
char c);
117 inline void EvalCdataBody(
char c);
118 inline void EvalCdataEnd1(
char c);
119 inline void EvalCdataEnd2(
char c);
120 inline void EvalAttribute(
char c);
121 inline void EvalAttrName(
char c);
122 inline void EvalAttrNameSpace(
char c);
123 inline void EvalAttrEq(
char c);
124 inline void EvalAttrVal(
char c);
125 inline void EvalAttrValSq(
char c);
126 inline void EvalAttrValDq(
char c);
127 inline void EvalLiteralTag(
char c);
128 inline void EvalScriptTag(
char c);
129 inline void EvalDirective(
char c);
130 inline void EvalBogusComment(
char c);
136 void MakeAttribute(
bool has_value);
137 void FinishAttribute(
char c,
bool has_value,
bool brief_close);
142 void EmitTagOpen(
bool allow_implicit_close);
144 void EmitTagBriefClose();
145 void EmitDirective();
146 void Restart(
char c);
161 HtmlElement* PopElementMatchingTag(const StringPiece& tag);
169 static inline
bool IsI18nChar(
char c) {
return (((c) & 0x80) != 0); }
172 static inline bool IsLegalTagFirstChar(
char c);
174 static inline bool IsLegalTagChar(
char c);
177 static inline bool IsLegalAttrNameChar(
char c);
220 HtmlParse* html_parse_;
227 bool has_attr_value_;
228 HtmlElement* element_;
233 bool script_html_comment_;
234 bool script_html_comment_script_;
235 bool discard_until_start_state_for_error_recovery_;
239 ContentType content_type_;
242 std::vector<HtmlElement*> element_stack_;
246 bool size_limit_exceeded_;
250 int64 num_bytes_parsed_;
bool IsImplicitlyClosedTag(HtmlName::Keyword keyword) const
Determines whether a tag should be terminated in HTML.
bool IsOptionallyClosedTag(HtmlName::Keyword keyword) const
Determines whether it's OK to leave a tag unclosed.
QuoteStyle
Various ways things can be quoted (or not)
Definition: html_element.h:60
void FinishParse()
Completes parse, reporting any leftover text as a final HtmlCharacterEvent.
void DebugPrintStack()
Print element stack to stdout (for debugging).
const DocType & doctype() const
Definition: html_lexer.h:89
void StartParse(const StringPiece &id, const ContentType &content_type)
Initialize a new parse session, id is only used for error messages.
Definition: html_parse.h:88
Definition: html_element.h:42
void Parse(const char *text, int size)
void set_size_limit(int64 x)
Sets the limit on the maximum number of bytes that should be parsed.
Definition: html_lexer.h:92
bool size_limit_exceeded() const
Definition: html_lexer.h:96
std::string GoogleString
PAGESPEED_KERNEL_BASE_STRING_H_.
Definition: string.h:24
Style
Definition: html_element.h:50
bool TagAllowsBriefTermination(HtmlName::Keyword keyword) const
Determines whether a tag can be terminated briefly (e.g. <tag>)
Definition: content_type.h:31
Keyword
Definition: html_name.h:39
static bool IsLiteralTag(HtmlName::Keyword keyword)
static bool IsSometimesLiteralTag(HtmlName::Keyword keyword)
Definition: html_lexer.h:45
HtmlElement * Parent() const