Page Speed Optimization Libraries  1.9.32.2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
rewrite_driver.h
Go to the documentation of this file.
1 /*
2  * Copyright 2010 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http:///www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #ifndef NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_H_
20 #define NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_H_
21 
22 #include <map>
23 #include <set>
24 #include <vector>
25 
26 #include "base/logging.h"
29 #include "net/instaweb/http/public/request_context.h"
40 #include "pagespeed/kernel/base/abstract_mutex.h"
41 #include "pagespeed/kernel/base/basictypes.h"
42 #include "pagespeed/kernel/base/printf_format.h"
43 #include "pagespeed/kernel/base/proto_util.h"
44 #include "pagespeed/kernel/base/scoped_ptr.h"
45 #include "pagespeed/kernel/base/string.h"
46 #include "pagespeed/kernel/base/string_util.h"
47 #include "pagespeed/kernel/base/thread_annotations.h"
48 #include "pagespeed/kernel/base/thread_system.h"
49 #include "pagespeed/kernel/html/html_element.h"
50 #include "pagespeed/kernel/html/html_parse.h"
51 #include "pagespeed/kernel/http/content_type.h"
52 #include "pagespeed/kernel/http/google_url.h"
53 #include "pagespeed/kernel/http/response_headers.h"
54 #include "pagespeed/kernel/http/user_agent_matcher.h"
55 #include "pagespeed/kernel/thread/queued_worker_pool.h"
56 #include "pagespeed/kernel/thread/scheduler.h"
57 #include "pagespeed/kernel/util/categorized_refcount.h"
58 #include "pagespeed/kernel/util/url_segment_encoder.h"
59 
60 namespace net_instaweb {
61 
62 class AbstractLogRecord;
63 class AbstractMutex;
64 class AbstractPropertyPage;
65 class AsyncFetch;
66 class CriticalCssResult;
67 class CriticalKeys;
68 class CriticalLineInfo;
69 class DebugFilter;
70 class DomStatsFilter;
71 class DomainRewriteFilter;
72 class FallbackPropertyPage;
73 class FileSystem;
74 class FlushEarlyInfo;
75 class FlushEarlyRenderInfo;
76 class Function;
77 class HtmlFilter;
78 class HtmlWriterFilter;
79 class MessageHandler;
80 class OutputResource;
81 class PropertyPage;
82 class RequestHeaders;
83 class RequestProperties;
84 class RequestTrace;
85 class ResourceContext;
86 class ResourceNamer;
87 class RewriteDriverPool;
88 class RewriteFilter;
89 class SplitHtmlConfig;
90 class Statistics;
91 class UrlAsyncFetcher;
92 class UrlLeftTrimFilter;
93 class UrlNamer;
94 class Writer;
95 
98 class RewriteDriver : public HtmlParse {
99  public:
102  kWriteFailed,
103  kNoResolutionNeeded,
104  kSuccess
105  };
106 
108  enum WaitMode {
114  };
116 
128  enum XhtmlStatus {
129  kXhtmlUnknown,
130  kIsXhtml,
131  kIsNotXhtml
132  };
133 
136  kInlineUnauthorizedResources,
137  kInlineOnlyAuthorizedResources
138  };
139 
141  enum IntendedFor {
142  kIntendedForInlining,
143  kIntendedForGeneral
144  };
145 
149  static const char kDomCohort[];
151  static const char kBeaconCohort[];
152 
155  static const char kLastRequestTimestamp[];
157  static const char kParseSizeLimitExceeded[];
159  static const char kSubresourcesPropertyName[];
161  static const char kStatusCodePropertyName[];
162 
163  RewriteDriver(MessageHandler* message_handler,
164  FileSystem* file_system,
165  UrlAsyncFetcher* url_async_fetcher);
166 
169  virtual ~RewriteDriver();
170 
173  RewriteDriver* Clone();
174 
180  void Clear();
181 
183  static void InitStats(Statistics* statistics);
184 
186  static void Initialize();
187  static void Terminate();
188 
190  static GoogleString DeadlineExceededMessage(StringPiece filter_name);
191 
194  void SetServerContext(ServerContext* server_context);
195 
198  bool MayCacheExtendCss() const;
199  bool MayCacheExtendImages() const;
200  bool MayCacheExtendPdfs() const;
201  bool MayCacheExtendScripts() const;
202 
203  const GoogleString& user_agent() const { return user_agent_; }
204 
205  void SetUserAgent(const StringPiece& user_agent_string);
206 
207  const RequestProperties* request_properties() const {
208  return request_properties_.get();
209  }
210 
212  void ClearRequestProperties();
213 
215  bool using_spdy() const { return request_context_->using_spdy(); }
216 
217  bool write_property_cache_dom_cohort() const {
218  return write_property_cache_dom_cohort_;
219  }
220  void set_write_property_cache_dom_cohort(bool x) {
221  write_property_cache_dom_cohort_ = x;
222  }
223 
224  RequestContextPtr request_context() { return request_context_; }
225  void set_request_context(const RequestContextPtr& x);
226 
229  RequestTrace* trace_context();
230 
233  void TracePrintf(const char* fmt, ...);
234 
237  ResponseHeaders* mutable_response_headers() {
238  return flush_occurred_ ? NULL : response_headers_;
239  }
240 
247  const ResponseHeaders* response_headers() {
248  return response_headers_;
249  }
250 
254  void set_response_headers_ptr(ResponseHeaders* headers) {
255  response_headers_ = headers;
256  }
257 
263  void SetRequestHeaders(const RequestHeaders& headers);
264 
265  const RequestHeaders* request_headers() const {
266  return request_headers_.get();
267  }
268 
269  UserAgentMatcher* user_agent_matcher() const {
270  DCHECK(server_context() != NULL);
271  return server_context()->user_agent_matcher();
272  }
273 
278  void AddFilters();
279 
283  void AddOwnedEarlyPreRenderFilter(HtmlFilter* filter);
284 
286  void PrependOwnedPreRenderFilter(HtmlFilter* filter);
288  void AppendOwnedPreRenderFilter(HtmlFilter* filter);
289 
291  void AddOwnedPostRenderFilter(HtmlFilter* filter);
293  void AddUnownedPostRenderFilter(HtmlFilter* filter);
294 
306  void AppendRewriteFilter(RewriteFilter* filter);
307 
310  void PrependRewriteFilter(RewriteFilter* filter);
311 
320 
326  void SetWriter(Writer* writer);
327 
328  Writer* writer() const { return writer_; }
329 
358  bool FetchResource(const StringPiece& url, AsyncFetch* fetch);
359 
377  void FetchInPlaceResource(const GoogleUrl& gurl, bool proxy_mode,
378  AsyncFetch* async_fetch);
379 
386  bool FetchOutputResource(const OutputResourcePtr& output_resource,
387  RewriteFilter* filter,
388  AsyncFetch* async_fetch);
389 
396  OutputResourcePtr DecodeOutputResource(const GoogleUrl& url,
397  RewriteFilter** filter) const;
398 
404  bool DecodeOutputResourceName(const GoogleUrl& url,
405  const RewriteOptions* options_to_use,
406  const UrlNamer* url_namer,
407  ResourceNamer* name_out,
408  OutputResourceKind* kind_out,
409  RewriteFilter** filter_out) const;
410 
423  StringPiece url,
424  GoogleString* error_out,
425  RewriteContext::CacheLookupResultCallback* callback);
426 
428  bool DecodeUrl(const GoogleUrl& url,
429  StringVector* decoded_urls) const;
430 
433  bool DecodeUrlGivenOptions(const GoogleUrl& url,
434  const RewriteOptions* options,
435  const UrlNamer* url_namer,
436  StringVector* decoded_urls) const;
437 
438  FileSystem* file_system() { return file_system_; }
439  UrlAsyncFetcher* async_fetcher() { return url_async_fetcher_; }
440 
445  void SetSessionFetcher(UrlAsyncFetcher* f);
446 
447  UrlAsyncFetcher* distributed_fetcher() { return distributed_async_fetcher_; }
450  distributed_async_fetcher_ = fetcher;
451  }
452 
458 
459  ServerContext* server_context() const { return server_context_; }
460  Statistics* statistics() const;
461 
464  set_options_for_pool(NULL, options);
465  }
466 
470  controlling_pool_ = pool;
471  options_.reset(options);
472  }
473 
475  RewriteDriverPool* controlling_pool() { return controlling_pool_; }
476 
478  const RewriteOptions* options() const { return options_.get(); }
479 
483  virtual bool StartParseId(const StringPiece& url, const StringPiece& id,
484  const ContentType& content_type);
485 
492  virtual void FinishParse();
493 
497  void FinishParseAsync(Function* callback);
498 
502  void InfoAt(const RewriteContext* context,
503  const char* msg, ...) INSTAWEB_PRINTF_FORMAT(3, 4);
504 
509 
516  OutputResourcePtr CreateOutputResourceFromResource(
517  const StringPiece& filter_id,
518  const UrlSegmentEncoder* encoder,
519  const ResourceContext* data,
520  const ResourcePtr& input_resource,
521  OutputResourceKind kind,
522  GoogleString* failure_reason);
523 
540  OutputResourcePtr CreateOutputResourceWithPath(
541  const StringPiece& mapped_path, const StringPiece& unmapped_path,
542  const StringPiece& base_url, const StringPiece& filter_id,
543  const StringPiece& name, OutputResourceKind kind,
544  GoogleString* failure_reason);
545 
549  const StringPiece& filter_id,
550  const StringPiece& name,
551  ResourceNamer* full_name);
552 
556  OutputResourcePtr CreateOutputResourceWithUnmappedUrl(
557  const GoogleUrl& unmapped_gurl, const StringPiece& filter_id,
558  const StringPiece& name, OutputResourceKind kind,
559  GoogleString* failure_reason);
560 
564  const StringPiece& mapped_path, const StringPiece& unmapped_path,
565  const StringPiece& filter_id, const StringPiece& name,
566  OutputResourceKind kind, GoogleString* failure_reason) {
567  return CreateOutputResourceWithPath(mapped_path, unmapped_path,
568  decoded_base_url_.AllExceptLeaf(),
569  filter_id, name, kind, failure_reason);
570  }
571 
574  OutputResourcePtr CreateOutputResourceWithPath(
575  const StringPiece& path, const StringPiece& filter_id,
576  const StringPiece& name, OutputResourceKind kind,
577  GoogleString* failure_reason) {
578  return CreateOutputResourceWithPath(path, path, path, filter_id, name,
579  kind, failure_reason);
580  }
581 
587  ResourcePtr CreateInputResource(const GoogleUrl& input_url,
588  bool* is_authorized);
589 
605  ResourcePtr CreateInputResource(
606  const GoogleUrl& input_url,
607  InlineAuthorizationPolicy inline_authorization_policy,
608  IntendedFor intended_for,
609  bool* is_authorized);
610 
615  const StringPiece& absolute_url);
616 
620  bool IsResourceUrlClaimed(const GoogleUrl& url) const;
621 
626  bool MatchesBaseUrl(const GoogleUrl& input_url) const;
627 
633  bool MayRewriteUrl(const GoogleUrl& domain_url,
634  const GoogleUrl& input_url,
635  InlineAuthorizationPolicy inline_authorization_policy,
636  IntendedFor intended_for,
637  bool* is_authorized_domain) const;
638 
642  const GoogleUrl& base_url() const { return base_url_; }
643 
645  StringPiece fetch_url() const { return fetch_url_; }
646 
650  const GoogleUrl& decoded_base_url() const { return decoded_base_url_; }
651  StringPiece decoded_base() const { return decoded_base_url_.Spec(); }
652 
654  bool IsHttps() const { return google_url().SchemeIs("https"); }
655 
656  const UrlSegmentEncoder* default_encoder() const { return &default_encoder_; }
657 
659  RewriteFilter* FindFilter(const StringPiece& id) const;
660 
662  bool refs_before_base() { return refs_before_base_; }
663 
668  void set_refs_before_base() { refs_before_base_ = true; }
669 
674  StringPiece containing_charset() { return containing_charset_; }
675  void set_containing_charset(const StringPiece charset) {
676  charset.CopyToString(&containing_charset_);
677  }
678 
680  HtmlResourceSlotPtr GetSlot(const ResourcePtr& resource,
681  HtmlElement* elt,
682  HtmlElement::Attribute* attr);
683 
688  bool InitiateRewrite(RewriteContext* rewrite_context)
689  LOCKS_EXCLUDED(rewrite_mutex());
690  void InitiateFetch(RewriteContext* rewrite_context);
691 
702  void RewriteComplete(RewriteContext* rewrite_context, bool permit_render);
703 
707  void ReportSlowRewrites(int num);
708 
713  void Cleanup();
714 
719  void AddUserReference();
720 
722  GoogleString ToString(bool show_detached_contexts);
723  void PrintState(bool show_detached_contexts);
724  void PrintStateToErrorLog(bool show_detached_contexts);
725 
728  void WaitForCompletion();
729 
736  void WaitForShutDown();
737 
741  void BoundedWaitFor(WaitMode mode, int64 timeout_ms)
742  LOCKS_EXCLUDED(rewrite_mutex());
743 
752  fully_rewrite_on_flush_ = x;
753  }
754 
756  bool fully_rewrite_on_flush() const {
757  return fully_rewrite_on_flush_;
758  }
759 
764  fast_blocking_rewrite_ = x;
765  }
766 
767  bool fast_blocking_rewrite() const {
768  return fast_blocking_rewrite_;
769  }
770 
773  void EnableBlockingRewrite(RequestHeaders* request_headers);
774 
781  void set_externally_managed(bool x) { externally_managed_ = x; }
782 
786  void DetachFetch();
787 
790  void DetachedFetchComplete();
791 
795  void FetchComplete();
796 
802  void DeleteRewriteContext(RewriteContext* rewrite_context);
803 
804  int rewrite_deadline_ms() { return options()->rewrite_deadline_ms(); }
805 
811  max_page_processing_delay_ms_ = x;
812  }
813  int max_page_processing_delay_ms() { return max_page_processing_delay_ms_; }
814 
816  void set_device_type(UserAgentMatcher::DeviceType x) { device_type_ = x; }
817  UserAgentMatcher::DeviceType device_type() const { return device_type_; }
818 
824  RewriteContext* RegisterForPartitionKey(const GoogleString& partition_key,
825  RewriteContext* candidate);
826 
832  const GoogleString& partition_key, RewriteContext* candidate);
833 
836  void RequestFlush() { flush_requested_ = true; }
837  bool flush_requested() const { return flush_requested_; }
838 
851 
855  void ExecuteFlushIfRequestedAsync(Function* callback);
856 
865  virtual void Flush();
866 
870  void FlushAsync(Function* done);
871 
873  void AddRewriteTask(Function* task);
874 
877  void AddLowPriorityRewriteTask(Function* task);
878 
879  QueuedWorkerPool::Sequence* html_worker() { return html_worker_; }
880  QueuedWorkerPool::Sequence* rewrite_worker() { return rewrite_worker_; }
881  QueuedWorkerPool::Sequence* low_priority_rewrite_worker() {
882  return low_priority_rewrite_worker_;
883  }
884 
885  Scheduler* scheduler() { return scheduler_; }
886 
889  DomainRewriteFilter* domain_rewriter() { return domain_rewriter_.get(); }
890  UrlLeftTrimFilter* url_trim_filter() { return url_trim_filter_.get(); }
891 
899  CssResolutionStatus ResolveCssUrls(const GoogleUrl& input_css_base,
900  const StringPiece& output_css_base,
901  const StringPiece& contents,
902  Writer* writer,
903  MessageHandler* handler);
904 
912  bool ShouldAbsolutifyUrl(const GoogleUrl& input_base,
913  const GoogleUrl& output_base,
914  bool* proxy_mode) const;
915 
924  AbstractPropertyPage* page,
925  StringPiece property_name,
926  StringPiece property_value);
927 
930  PropertyPage* property_page() const;
935  FallbackPropertyPage* fallback_property_page() const {
936  return fallback_property_page_;
937  }
939  void set_property_page(PropertyPage* page);
941  void set_fallback_property_page(FallbackPropertyPage* page);
943  void set_unowned_fallback_property_page(FallbackPropertyPage* page);
944 
946  const CriticalLineInfo* critical_line_info() const;
947 
950  void set_critical_line_info(CriticalLineInfo* critical_line_info);
951 
952  CriticalKeys* beacon_critical_line_info() const;
953  void set_beacon_critical_line_info(CriticalKeys* beacon_critical_line_info);
954 
955  const SplitHtmlConfig* split_html_config();
956 
957  CriticalCssResult* critical_css_result() const;
960  void set_critical_css_result(CriticalCssResult* critical_css_rules);
961 
967 
970  return critical_images_info_.get();
971  }
972 
978  return critical_selector_info_.get();
979  }
980 
985  critical_selector_info_.reset(info);
986  }
987 
993  critical_images_info_.reset(critical_images_info);
994  }
995 
998  bool CriticalSelectorsEnabled() const;
999 
1003  return (options()->Enabled(RewriteOptions::kFlattenCssImports) ||
1004  (!options()->Forbidden(RewriteOptions::kFlattenCssImports) &&
1006  options()->Enabled(RewriteOptions::kComputeCriticalCss))));
1007  }
1008 
1012  int num_inline_preview_images() const { return num_inline_preview_images_; }
1013 
1016 
1020  return num_flushed_early_pagespeed_resources_;
1021  }
1022 
1026  ++num_flushed_early_pagespeed_resources_;
1027  }
1028 
1032 
1035 
1039 
1040  void set_flushed_cached_html(bool x) { flushed_cached_html_ = x; }
1041  bool flushed_cached_html() { return flushed_cached_html_; }
1042 
1043  void set_flushing_cached_html(bool x) { flushing_cached_html_ = x; }
1044  bool flushing_cached_html() const { return flushing_cached_html_; }
1045 
1046  void set_flushed_early(bool x) { flushed_early_ = x; }
1047  bool flushed_early() const { return flushed_early_; }
1048 
1049  void set_flushing_early(bool x) { flushing_early_ = x; }
1050  bool flushing_early() const { return flushing_early_; }
1051 
1052  void set_is_lazyload_script_flushed(bool x) {
1053  is_lazyload_script_flushed_ = x;
1054  }
1055  bool is_lazyload_script_flushed() const {
1056  return is_lazyload_script_flushed_; }
1057 
1059  FlushEarlyInfo* flush_early_info();
1060 
1061  FlushEarlyRenderInfo* flush_early_render_info() const;
1062 
1066  FlushEarlyRenderInfo* flush_early_render_info);
1067 
1070  bool DebugMode() const { return options()->Enabled(RewriteOptions::kDebug); }
1071 
1077  void InsertDebugComment(StringPiece unescaped_message, HtmlNode* node);
1078  void InsertDebugComments(
1079  const protobuf::RepeatedPtrField<GoogleString>& unescaped_messages,
1080  HtmlElement* element);
1081  void InsertUnauthorizedDomainDebugComment(StringPiece url,
1082  HtmlElement* element);
1083 
1085  static GoogleString GenerateUnauthorizedDomainDebugComment(
1086  const GoogleUrl& gurl);
1087 
1090  void SaveOriginalHeaders(const ResponseHeaders& response_headers);
1091 
1094  AbstractLogRecord* log_record();
1095 
1096  DomStatsFilter* dom_stats_filter() const {
1097  return dom_stats_filter_;
1098  }
1099 
1102  bool can_rewrite_resources() const { return can_rewrite_resources_; }
1103 
1105  bool is_nested() const { return is_nested_; }
1106 
1110  bool MetadataRequested(const RequestHeaders& request_headers) const;
1111 
1113  bool tried_to_distribute_fetch() const { return tried_to_distribute_fetch_; }
1114 
1127  bool Write(const ResourceVector& inputs,
1128  const StringPiece& contents,
1129  const ContentType* type,
1130  StringPiece charset,
1131  OutputResource* output);
1132 
1133  void set_defer_instrumentation_script(bool x) {
1134  defer_instrumentation_script_ = x;
1135  }
1136  bool defer_instrumentation_script() const {
1137  return defer_instrumentation_script_;
1138  }
1139 
1143  ScopedMutex lock(rewrite_mutex());
1144  num_initiated_rewrites_ = x;
1145  }
1146  int64 num_initiated_rewrites() const {
1147  ScopedMutex lock(rewrite_mutex());
1148  return num_initiated_rewrites_;
1149  }
1152  ScopedMutex lock(rewrite_mutex());
1153  num_detached_rewrites_ = x;
1154  }
1155  int64 num_detached_rewrites() const {
1156  ScopedMutex lock(rewrite_mutex());
1157  return num_detached_rewrites_;
1158  }
1159 
1160  void set_pagespeed_query_params(StringPiece x) {
1161  x.CopyToString(&pagespeed_query_params_);
1162  }
1163  StringPiece pagespeed_query_params() const {
1164  return pagespeed_query_params_;
1165  }
1166 
1167  void set_pagespeed_option_cookies(StringPiece x) {
1168  x.CopyToString(&pagespeed_option_cookies_);
1169  }
1170  StringPiece pagespeed_option_cookies() const {
1171  return pagespeed_option_cookies_;
1172  }
1173 
1176  const GoogleString& CacheFragment() const;
1177 
1184  bool SetOrClearPageSpeedOptionCookies(const GoogleUrl& gurl,
1185  ResponseHeaders* response_headers);
1186 
1189  bool Decode(StringPiece leaf, ResourceNamer* resource_namer) const;
1190 
1191  protected:
1192  virtual void DetermineEnabledFiltersImpl();
1193 
1194  private:
1195  friend class DistributedRewriteContextTest;
1196  friend class RewriteContext;
1197  friend class RewriteDriverTest;
1198  friend class RewriteTestBase;
1199  friend class ServerContextTest;
1200 
1201  typedef std::map<GoogleString, RewriteFilter*> StringFilterMap;
1202 
1204  bool ShouldDistributeFetch(const StringPiece& filter_id);
1205 
1217  bool DistributeFetch(const StringPiece& url, const StringPiece& filter_id,
1218  AsyncFetch* async_fetch);
1219 
1224  void CheckForCompletionAsync(WaitMode wait_mode, int64 timeout_ms,
1225  Function* done)
1226  EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
1227 
1231  void TryCheckForCompletion(WaitMode wait_mode, int64 end_time_ms,
1232  Function* done)
1233  EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
1234 
1236  bool IsDone(WaitMode wait_mode, bool deadline_reached)
1237  EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
1238 
1241  bool WaitForPendingAsyncEvents(WaitMode wait_mode) {
1242  return wait_mode == kWaitForShutDown ||
1243  (fully_rewrite_on_flush_ && !fast_blocking_rewrite_);
1244  }
1245 
1249  void FlushAsyncDone(int num_rewrites, Function* callback);
1250 
1255  int64 ComputeCurrentFlushWindowRewriteDelayMs();
1256 
1258  void QueueFlushAsyncDone(int num_rewrites, Function* callback);
1259 
1262  void QueueFinishParseAfterFlush(Function* user_callback);
1263  void FinishParseAfterFlush(Function* user_callback);
1264 
1265  bool RewritesComplete() const EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
1266 
1269  void SetBaseUrlIfUnset(const StringPiece& new_base);
1270 
1273  void SetBaseUrlForFetch(const StringPiece& url);
1274 
1277  void SetDecodedUrlFromBase();
1278 
1280  AbstractMutex* rewrite_mutex() const LOCK_RETURNED(scheduler_->mutex()) {
1281  return scheduler_->mutex();
1282  }
1283 
1285  virtual void ParseTextInternal(const char* content, int size);
1286 
1288  bool ShouldSkipParsing();
1289 
1291  int SignatureLength() const;
1292 
1293  friend class ScanFilter;
1294 
1298  void RegisterRewriteFilter(RewriteFilter* filter);
1299 
1304  void EnableRewriteFilter(const char* id);
1305 
1311  ResourcePtr CreateInputResourceUnchecked(const GoogleUrl& gurl,
1312  bool is_authorized_domain);
1313 
1314  void AddPreRenderFilters();
1315  void AddPostRenderFilters();
1316 
1318  bool DecodeOutputResourceNameHelper(const GoogleUrl& url,
1319  const RewriteOptions* options_to_use,
1320  const UrlNamer* url_namer,
1321  ResourceNamer* name_out,
1322  OutputResourceKind* kind_out,
1323  RewriteFilter** filter_out,
1324  GoogleString* url_base,
1325  StringVector* urls) const;
1326 
1336  void WriteDomCohortIntoPropertyCache();
1337 
1339  CacheUrlAsyncFetcher* CreateCustomCacheFetcher(UrlAsyncFetcher* base_fetcher);
1340 
1347  void PossiblyPurgeCachedResponseAndReleaseDriver();
1348 
1350  void LogStats();
1351 
1367  bool PrepareShouldSignal() EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
1368  void SignalIfRequired(bool result_of_prepare_should_signal)
1369  EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
1370 
1382  bool base_was_set_;
1383 
1388  bool refs_before_base_;
1389 
1391  GoogleString containing_charset_;
1392 
1395  void PopulateRequestContext();
1396 
1397  bool filters_added_;
1398  bool externally_managed_;
1399 
1408  enum RefCategory {
1409  kRefUser,
1410  kRefParsing,
1411 
1415  kRefPendingRewrites,
1416 
1420  kRefDetachedRewrites,
1421 
1429  kRefDeletingRewrites,
1430 
1432  kRefFetchUserFacing,
1433 
1435  kRefFetchBackground,
1436 
1441  kRefAsyncEvents,
1442 
1443  kNumRefCategories
1444  };
1445 
1446  friend class CategorizedRefcount<RewriteDriver, RefCategory>;
1447 
1449  CategorizedRefcount<RewriteDriver, RefCategory> ref_counts_;
1450 
1452  void LastRefRemoved();
1453  StringPiece RefCategoryName(RefCategory cat);
1454 
1457  void DropReference(RefCategory cat);
1458 
1461  bool release_driver_;
1462 
1465  bool parsing_ GUARDED_BY(rewrite_mutex());
1466 
1470  WaitMode waiting_ GUARDED_BY(rewrite_mutex());
1471 
1473  bool waiting_deadline_reached_ GUARDED_BY(rewrite_mutex());
1474 
1479  bool fully_rewrite_on_flush_;
1480 
1483  bool fast_blocking_rewrite_;
1484 
1485  bool flush_requested_;
1486  bool flush_occurred_;
1487 
1489  bool flushed_cached_html_;
1490 
1492  bool flushing_cached_html_;
1493 
1496  bool flushed_early_;
1500  bool flushing_early_;
1501 
1504  bool is_lazyload_script_flushed_;
1505 
1509  bool write_property_cache_dom_cohort_;
1510 
1513  GoogleUrl base_url_;
1514 
1518  GoogleUrl decoded_base_url_;
1519 
1522  GoogleString fetch_url_;
1523 
1524  GoogleString user_agent_;
1525 
1526  LazyBool should_skip_parsing_;
1527 
1528  StringFilterMap resource_filter_map_;
1529 
1530  ResponseHeaders* response_headers_;
1531 
1534  scoped_ptr<const RequestHeaders> request_headers_;
1535 
1536  int status_code_;
1537 
1540  typedef std::vector<RewriteContext*> RewriteContextVector;
1541  RewriteContextVector rewrites_;
1542 
1545  int max_page_processing_delay_ms_;
1546 
1547  typedef std::set<RewriteContext*> RewriteContextSet;
1548 
1553  RewriteContextSet initiated_rewrites_ GUARDED_BY(rewrite_mutex());
1554 
1556  int64 num_initiated_rewrites_ GUARDED_BY(rewrite_mutex());
1557 
1565  int64 num_detached_rewrites_ GUARDED_BY(rewrite_mutex());
1566 
1574  RewriteContextSet detached_rewrites_ GUARDED_BY(rewrite_mutex());
1575 
1577  int possibly_quick_rewrites_ GUARDED_BY(rewrite_mutex());
1578 
1581  RewriteContextVector fetch_rewrites_;
1582 
1585  FileSystem* file_system_;
1586  ServerContext* server_context_;
1587  Scheduler* scheduler_;
1588  UrlAsyncFetcher* default_url_async_fetcher_;
1589 
1593  UrlAsyncFetcher* url_async_fetcher_;
1594 
1598  UrlAsyncFetcher* distributed_async_fetcher_;
1599 
1602  std::vector<UrlAsyncFetcher*> owned_url_async_fetchers_;
1603 
1604  DomStatsFilter* dom_stats_filter_;
1605  scoped_ptr<HtmlWriterFilter> html_writer_filter_;
1606 
1607  ScanFilter scan_filter_;
1608  scoped_ptr<DomainRewriteFilter> domain_rewriter_;
1609  scoped_ptr<UrlLeftTrimFilter> url_trim_filter_;
1610 
1613  typedef std::map<GoogleString, RewriteContext*> PrimaryRewriteContextMap;
1614  PrimaryRewriteContextMap primary_rewrite_context_map_;
1615 
1616  HtmlResourceSlotSet slots_;
1617 
1618  scoped_ptr<RewriteOptions> options_;
1619 
1620  RewriteDriverPool* controlling_pool_;
1621 
1623  scoped_ptr<CacheUrlAsyncFetcher::AsyncOpHooks>
1624  cache_url_async_fetcher_async_op_hooks_;
1625 
1627  UrlSegmentEncoder default_encoder_;
1628 
1630  FilterList early_pre_render_filters_;
1632  FilterList pre_render_filters_;
1633 
1635  std::vector<ResourceUrlClaimant*> resource_claimants_;
1636 
1640  FilterVector filters_to_delete_;
1641 
1642  QueuedWorkerPool::Sequence* html_worker_;
1643  QueuedWorkerPool::Sequence* rewrite_worker_;
1644  QueuedWorkerPool::Sequence* low_priority_rewrite_worker_;
1645 
1646  Writer* writer_;
1647 
1650  FallbackPropertyPage* fallback_property_page_;
1651 
1653  bool owns_property_page_;
1654 
1656  UserAgentMatcher::DeviceType device_type_;
1657 
1658  scoped_ptr<CriticalLineInfo> critical_line_info_;
1659  scoped_ptr<CriticalKeys> beacon_critical_line_info_;
1660 
1661  scoped_ptr<SplitHtmlConfig> split_html_config_;
1662 
1665  scoped_ptr<CriticalImagesInfo> critical_images_info_;
1666  scoped_ptr<CriticalSelectorInfo> critical_selector_info_;
1667 
1668  scoped_ptr<CriticalCssResult> critical_css_result_;
1669 
1671  bool xhtml_mimetype_computed_;
1672  XhtmlStatus xhtml_status_ : 8;
1673 
1676  int num_inline_preview_images_;
1677 
1679  int num_flushed_early_pagespeed_resources_;
1680 
1682  int num_bytes_in_;
1683 
1684  DebugFilter* debug_filter_;
1685 
1686  scoped_ptr<FlushEarlyInfo> flush_early_info_;
1687  scoped_ptr<FlushEarlyRenderInfo> flush_early_render_info_;
1688 
1689  bool can_rewrite_resources_;
1690  bool is_nested_;
1691 
1694  RequestContextPtr request_context_;
1695 
1697  int64 start_time_ms_;
1698 
1699  scoped_ptr<RequestProperties> request_properties_;
1700 
1704  static int initialized_count_;
1705 
1708  bool tried_to_distribute_fetch_;
1709 
1712  bool defer_instrumentation_script_;
1713 
1715  DownstreamCachePurger downstream_cache_purger_;
1716 
1718  GoogleString pagespeed_query_params_;
1719 
1721  GoogleString pagespeed_option_cookies_;
1722 
1723  DISALLOW_COPY_AND_ASSIGN(RewriteDriver);
1724 };
1725 
1729  public:
1730  virtual ~OptionsAwareHTTPCacheCallback();
1731  virtual bool IsCacheValid(const GoogleString& key,
1732  const ResponseHeaders& headers);
1733  virtual int64 OverrideCacheTtlMs(const GoogleString& key);
1734  virtual ResponseHeaders::VaryOption RespectVaryOnResources() const;
1735 
1739  static bool IsCacheValid(const GoogleString& key,
1740  const RewriteOptions& rewrite_options,
1741  const RequestContextPtr& request_ctx,
1742  const ResponseHeaders& headers);
1743 
1744  protected:
1748  const RewriteOptions* rewrite_options,
1749  const RequestContextPtr& request_ctx);
1750 
1751  private:
1752  const RewriteOptions* rewrite_options_;
1753 
1754  DISALLOW_COPY_AND_ASSIGN(OptionsAwareHTTPCacheCallback);
1755 };
1756 
1757 }
1758 
1759 #endif
virtual ResponseHeaders::VaryOption RespectVaryOnResources() const
OutputResourcePtr DecodeOutputResource(const GoogleUrl &url, RewriteFilter **filter) const
Definition: rewrite_driver.h:113
static const char kDomCohort[]
Definition: rewrite_driver.h:149
void EnableBlockingRewrite(RequestHeaders *request_headers)
void set_max_page_processing_delay_ms(int x)
Definition: rewrite_driver.h:810
void set_flush_early_render_info(FlushEarlyRenderInfo *flush_early_render_info)
void PopulateResourceNamer(const StringPiece &filter_id, const StringPiece &name, ResourceNamer *full_name)
const ResponseHeaders * response_headers()
Definition: rewrite_driver.h:247
OutputResourcePtr CreateOutputResourceWithPath(const StringPiece &mapped_path, const StringPiece &unmapped_path, const StringPiece &base_url, const StringPiece &filter_id, const StringPiece &name, OutputResourceKind kind, GoogleString *failure_reason)
void RewriteComplete(RewriteContext *rewrite_context, bool permit_render)
void set_critical_line_info(CriticalLineInfo *critical_line_info)
DomainRewriteFilter * domain_rewriter()
Definition: rewrite_driver.h:889
Definition: critical_selector_finder.h:43
bool Write(const ResourceVector &inputs, const StringPiece &contents, const ContentType *type, StringPiece charset, OutputResource *output)
bool DecodeUrlGivenOptions(const GoogleUrl &url, const RewriteOptions *options, const UrlNamer *url_namer, StringVector *decoded_urls) const
void AppendRewriteFilter(RewriteFilter *filter)
bool MayCacheExtendCss() const
bool MetadataRequested(const RequestHeaders &request_headers) const
void ClearRequestProperties()
Reinitializes request_properties_, clearing any cached values.
const GoogleUrl & base_url() const
Definition: rewrite_driver.h:642
bool FetchOutputResource(const OutputResourcePtr &output_resource, RewriteFilter *filter, AsyncFetch *async_fetch)
virtual bool IsCacheValid(const GoogleString &key, const ResponseHeaders &headers)
void AddOwnedPostRenderFilter(HtmlFilter *filter)
Adds a filter to the end of the post-render chain, taking ownership.
void set_options_for_pool(RewriteDriverPool *pool, RewriteOptions *options)
Definition: rewrite_driver.h:469
RewriteFilter * FindFilter(const StringPiece &id) const
Finds a filter with the given ID, or returns NULL if none found.
static const char kSubresourcesPropertyName[]
Flush Subresources Info associted with the HTML page.
Definition: rewrite_driver.h:159
void set_response_headers_ptr(ResponseHeaders *headers)
Definition: rewrite_driver.h:254
GoogleString ToString(bool show_detached_contexts)
Debugging routines to print out data about the driver.
void SaveOriginalHeaders(const ResponseHeaders &response_headers)
void InfoAt(const RewriteContext *context, const char *msg,...) INSTAWEB_PRINTF_FORMAT(3
bool CriticalSelectorsEnabled() const
bool tried_to_distribute_fetch() const
Did the driver attempt to distribute the fetch?
Definition: rewrite_driver.h:1113
CssResolutionStatus
Status return-code for ResolveCssUrls.
Definition: rewrite_driver.h:101
CriticalSelectorInfo * critical_selector_info()
Definition: rewrite_driver.h:977
FallbackPropertyPage * fallback_property_page() const
Definition: rewrite_driver.h:935
StringPiece containing_charset()
Definition: rewrite_driver.h:674
ResourcePtr CreateInputResourceAbsoluteUncheckedForTestsOnly(const StringPiece &absolute_url)
FlushEarlyInfo * flush_early_info()
This method is not thread-safe. Call it only from the html parser thread.
void AddRewriteTask(Function *task)
Queues up a task to run on the (high-priority) rewrite thread.
static void InitStats(Statistics *statistics)
Initialize statistics for all filters that need it.
void SetSessionFetcher(UrlAsyncFetcher *f)
bool DebugMode() const
Definition: rewrite_driver.h:1070
Definition: url_left_trim_filter.h:47
bool FetchResource(const StringPiece &url, AsyncFetch *fetch)
void set_fully_rewrite_on_flush(bool x)
Definition: rewrite_driver.h:751
bool is_nested() const
Determine whether this driver is nested inside another.
Definition: rewrite_driver.h:1105
void increment_num_inline_preview_images()
We expect to this method to be called on the HTML parser thread.
bool LookupMetadataForOutputResource(StringPiece url, GoogleString *error_out, RewriteContext::CacheLookupResultCallback *callback)
void set_critical_images_info(CriticalImagesInfo *critical_images_info)
Definition: rewrite_driver.h:992
void FlushAsync(Function *done)
void FinishParseAsync(Function *callback)
void AddOwnedEarlyPreRenderFilter(HtmlFilter *filter)
static void Initialize()
Initialize statics. Initialize/Terminate calls must be paired.
const CriticalLineInfo * critical_line_info() const
Used by ImageRewriteFilter for identifying critical images.
bool IsResourceUrlClaimed(const GoogleUrl &url) const
RequestTrace * trace_context()
void set_custom_options(RewriteOptions *options)
Takes ownership of 'options'.
Definition: rewrite_driver.h:463
void PrintStateToErrorLog(bool show_detached_contexts)
For logs.
XhtmlStatus MimeTypeXhtmlStatus()
WaitMode
Mode for BoundedWaitForCompletion.
Definition: rewrite_driver.h:108
bool MatchesBaseUrl(const GoogleUrl &input_url) const
bool FlattenCssImportsEnabled() const
Definition: rewrite_driver.h:1002
Callback2< const GoogleUrl &, bool * > ResourceUrlClaimant
Definition: resource.h:340
Definition: rewrite_driver_pool.h:34
void SetRequestHeaders(const RequestHeaders &headers)
static GoogleString DeadlineExceededMessage(StringPiece filter_name)
Formats a "deadline exceeded" message for a given filter.
bool DecodeOutputResourceName(const GoogleUrl &url, const RewriteOptions *options_to_use, const UrlNamer *url_namer, ResourceNamer *name_out, OutputResourceKind *kind_out, RewriteFilter **filter_out) const
void FetchInPlaceResource(const GoogleUrl &gurl, bool proxy_mode, AsyncFetch *async_fetch)
static const char kStatusCodePropertyName[]
Status codes of previous responses.
Definition: rewrite_driver.h:161
void set_num_detached_rewrites(int64 x)
Sets the num_detached_rewrites_. This should only be called from test code.
Definition: rewrite_driver.h:1151
void DeleteRewriteContext(RewriteContext *rewrite_context)
OutputResourcePtr CreateOutputResourceWithMappedPath(const StringPiece &mapped_path, const StringPiece &unmapped_path, const StringPiece &filter_id, const StringPiece &name, OutputResourceKind kind, GoogleString *failure_reason)
Definition: rewrite_driver.h:563
void set_fast_blocking_rewrite(bool x)
Definition: rewrite_driver.h:763
void set_unowned_fallback_property_page(FallbackPropertyPage *page)
Does not take the ownership of the page.
ResponseHeaders * mutable_response_headers()
Definition: rewrite_driver.h:237
CssResolutionStatus ResolveCssUrls(const GoogleUrl &input_css_base, const StringPiece &output_css_base, const StringPiece &contents, Writer *writer, MessageHandler *handler)
bool using_spdy() const
Returns true if the request we're rewriting was made using SPDY.
Definition: rewrite_driver.h:215
XhtmlStatus
Definition: rewrite_driver.h:128
void set_fallback_property_page(FallbackPropertyPage *page)
Takes ownership of page.
void set_critical_selector_info(CriticalSelectorInfo *info)
Definition: rewrite_driver.h:984
RewriteDriver * Clone()
bool fully_rewrite_on_flush() const
Returns if this response has a blocking rewrite or not.
Definition: rewrite_driver.h:756
CacheUrlAsyncFetcher * CreateCacheOnlyFetcher()
Returns a cache fetcher that does not fall back to an actual fetcher.
ResourcePtr CreateInputResource(const GoogleUrl &input_url, bool *is_authorized)
void DeregisterForPartitionKey(const GoogleString &partition_key, RewriteContext *candidate)
static const char kLastRequestTimestamp[]
Definition: rewrite_driver.h:155
void AppendOwnedPreRenderFilter(HtmlFilter *filter)
Adds a filter to the end of the pre-render chain, taking ownership.
bool refs_before_base()
Returns refs_before_base.
Definition: rewrite_driver.h:662
PropertyPage * property_page() const
int num_flushed_early_pagespeed_resources() const
Definition: rewrite_driver.h:1019
bool MayRewriteUrl(const GoogleUrl &domain_url, const GoogleUrl &input_url, InlineAuthorizationPolicy inline_authorization_policy, IntendedFor intended_for, bool *is_authorized_domain) const
bool can_rewrite_resources() const
Definition: rewrite_driver.h:1102
Definition: cache_url_async_fetcher.h:57
Definition: split_html_config.h:50
static GoogleString GenerateUnauthorizedDomainDebugComment(const GoogleUrl &gurl)
Generates an unauthorized domain debug comment. Public for unit tests.
StringPiece fetch_url() const
The URL that was requested if FetchResource was called.
Definition: rewrite_driver.h:645
void decrement_async_events_count()
Decrements a reference count bumped up by increment_async_events_count()
void BoundedWaitFor(WaitMode mode, int64 timeout_ms) LOCKS_EXCLUDED(rewrite_mutex())
void AddUnownedPostRenderFilter(HtmlFilter *filter)
Same, without taking ownership.
bool IsHttps() const
Quick way to tell if the document url is https (ie was fetched via https).
Definition: rewrite_driver.h:654
void ReportSlowRewrites(int num)
void PrependRewriteFilter(RewriteFilter *filter)
HtmlResourceSlotPtr GetSlot(const ResourcePtr &resource, HtmlElement *elt, HtmlElement::Attribute *attr)
Establishes a HtmlElement slot for rewriting.
Definition: rewrite_driver.h:98
const GoogleUrl & decoded_base_url() const
Definition: rewrite_driver.h:650
bool SetOrClearPageSpeedOptionCookies(const GoogleUrl &gurl, ResponseHeaders *response_headers)
void set_num_initiated_rewrites(int64 x)
Definition: rewrite_driver.h:1142
void set_property_page(PropertyPage *page)
Takes ownership of page.
void PrependOwnedPreRenderFilter(HtmlFilter *filter)
Adds a filter to the beginning of the pre-render chain, taking ownership.
Definition: server_context.h:101
virtual bool StartParseId(const StringPiece &url, const StringPiece &id, const ContentType &content_type)
RewriteDriverPool * controlling_pool()
Pool in which this driver can be recycled. May be NULL.
Definition: rewrite_driver.h:475
Definition: rewrite_context.h:144
void set_externally_managed(bool x)
Definition: rewrite_driver.h:781
OutputResourcePtr CreateOutputResourceWithUnmappedUrl(const GoogleUrl &unmapped_gurl, const StringPiece &filter_id, const StringPiece &name, OutputResourceKind kind, GoogleString *failure_reason)
bool InitiateRewrite(RewriteContext *rewrite_context) LOCKS_EXCLUDED(rewrite_mutex())
void PrintState(bool show_detached_contexts)
For debugging.
int num_inline_preview_images() const
Definition: rewrite_driver.h:1012
void SetWriter(Writer *writer)
bool ShouldAbsolutifyUrl(const GoogleUrl &input_base, const GoogleUrl &output_base, bool *proxy_mode) const
void AddResourceUrlClaimant(ResourceUrlClaimant *claimant)
void AddLowPriorityRewriteTask(Function *task)
OptionsAwareHTTPCacheCallback(const RewriteOptions *rewrite_options, const RequestContextPtr &request_ctx)
void TracePrintf(const char *fmt,...)
InlineAuthorizationPolicy
See CreateInputResource.
Definition: rewrite_driver.h:135
const GoogleString & CacheFragment() const
wait for everything to complete (up to deadline)
Definition: rewrite_driver.h:110
static const char kBeaconCohort[]
The cohort for properties that are written by the beacon handler.
Definition: rewrite_driver.h:151
bool Decode(StringPiece leaf, ResourceNamer *resource_namer) const
Definition: http_cache.h:87
virtual int64 OverrideCacheTtlMs(const GoogleString &key)
void OutputResourcePtr CreateOutputResourceFromResource(const StringPiece &filter_id, const UrlSegmentEncoder *encoder, const ResourceContext *data, const ResourcePtr &input_resource, OutputResourceKind kind, GoogleString *failure_reason)
OutputResourcePtr CreateOutputResourceWithPath(const StringPiece &path, const StringPiece &filter_id, const StringPiece &name, OutputResourceKind kind, GoogleString *failure_reason)
Definition: rewrite_driver.h:574
Definition: output_resource.h:44
void set_critical_css_result(CriticalCssResult *critical_css_rules)
CacheUrlAsyncFetcher * CreateCacheFetcher()
void set_refs_before_base()
Definition: rewrite_driver.h:668
RewriteContext * RegisterForPartitionKey(const GoogleString &partition_key, RewriteContext *candidate)
Used internally. Do not pass in.
Definition: rewrite_driver.h:109
void ExecuteFlushIfRequestedAsync(Function *callback)
AbstractLogRecord * log_record()
Definition: domain_rewrite_filter.h:40
void SetServerContext(ServerContext *server_context)
const RewriteOptions * options() const
Return the options used for this RewriteDriver.
Definition: rewrite_driver.h:478
void UpdatePropertyValueInDomCohort(AbstractPropertyPage *page, StringPiece property_name, StringPiece property_value)
Definition: rewrite_options.h:83
void set_device_type(UserAgentMatcher::DeviceType x)
Sets the device type chosen for the current property_page.
Definition: rewrite_driver.h:816
Definition: rewrite_driver.h:1728
Counts some basic statistics observed as HTML is parsed.
Definition: dom_stats_filter.h:34
static const char kParseSizeLimitExceeded[]
Tracks if we exceeded the maximum size limit of html which we should parse.
Definition: rewrite_driver.h:157
bool DecodeUrl(const GoogleUrl &url, StringVector *decoded_urls) const
Decodes the incoming pagespeed url to original url(s).
Definition: request_properties.h:39
void InsertDebugComment(StringPiece unescaped_message, HtmlNode *node)
CriticalImagesInfo * critical_images_info() const
Used by ImageRewriteFilter for identifying critical images.
Definition: rewrite_driver.h:969
void increment_num_flushed_early_pagespeed_resources()
Definition: rewrite_driver.h:1025
IntendedFor
See CreateInputResource.
Definition: rewrite_driver.h:141
Definition: url_async_fetcher.h:33
Definition: resource_namer.h:32
void set_distributed_fetcher(UrlAsyncFetcher *fetcher)
Does not take ownership.
Definition: rewrite_driver.h:449
Definition: critical_images_finder.h:52
void RequestFlush()
Definition: rewrite_driver.h:836
OutputResourceKind
Definition: output_resource_kind.h:26