Page Speed Optimization Libraries  1.8.31.2
net/instaweb/rewriter/public/rewrite_driver.h
Go to the documentation of this file.
00001 /*
00002  * Copyright 2010 Google Inc.
00003  *
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  *
00008  *      http:///www.apache.org/licenses/LICENSE-2.0
00009  *
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 
00018 
00019 #ifndef NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_H_
00020 #define NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_H_
00021 
00022 #include <map>
00023 #include <set>
00024 #include <vector>
00025 
00026 #include "base/logging.h"
00027 #include "net/instaweb/htmlparse/public/html_element.h"
00028 #include "net/instaweb/htmlparse/public/html_parse.h"
00029 #include "net/instaweb/http/public/cache_url_async_fetcher.h"
00030 #include "net/instaweb/http/public/http_cache.h"
00031 #include "net/instaweb/http/public/request_context.h"
00032 #include "net/instaweb/http/public/user_agent_matcher.h"
00033 #include "net/instaweb/rewriter/public/critical_images_finder.h"
00034 #include "net/instaweb/rewriter/public/critical_selector_finder.h"
00035 #include "net/instaweb/rewriter/public/downstream_cache_purger.h"
00036 #include "net/instaweb/rewriter/public/output_resource_kind.h"
00037 #include "net/instaweb/rewriter/public/resource.h"
00038 #include "net/instaweb/rewriter/public/resource_slot.h"
00039 #include "net/instaweb/rewriter/public/rewrite_context.h"
00040 #include "net/instaweb/rewriter/public/rewrite_options.h"
00041 #include "net/instaweb/rewriter/public/scan_filter.h"
00042 #include "net/instaweb/rewriter/public/server_context.h"
00043 #include "net/instaweb/util/public/basictypes.h"
00044 #include "net/instaweb/util/public/google_url.h"
00045 #include "net/instaweb/util/public/printf_format.h"
00046 #include "net/instaweb/util/public/queued_worker_pool.h"
00047 #include "net/instaweb/util/public/scheduler.h"
00048 #include "net/instaweb/util/public/scoped_ptr.h"
00049 #include "net/instaweb/util/public/string.h"
00050 #include "net/instaweb/util/public/string_util.h"
00051 #include "net/instaweb/util/public/thread_system.h"
00052 #include "net/instaweb/util/public/url_segment_encoder.h"
00053 #include "pagespeed/kernel/base/abstract_mutex.h"
00054 #include "pagespeed/kernel/base/thread_annotations.h"
00055 #include "pagespeed/kernel/http/content_type.h"
00056 #include "pagespeed/kernel/http/response_headers.h"
00057 #include "pagespeed/kernel/util/categorized_refcount.h"
00058 
00059 namespace net_instaweb {
00060 
00061 class AbstractLogRecord;
00062 class AbstractMutex;
00063 class AbstractPropertyPage;
00064 class AsyncFetch;
00065 class CriticalCssResult;
00066 class CriticalKeys;
00067 class CriticalLineInfo;
00068 class DebugFilter;
00069 class DomStatsFilter;
00070 class DomainRewriteFilter;
00071 class FallbackPropertyPage;
00072 class FileSystem;
00073 class FlushEarlyInfo;
00074 class FlushEarlyRenderInfo;
00075 class Function;
00076 class HtmlFilter;
00077 class HtmlWriterFilter;
00078 class MessageHandler;
00079 class OutputResource;
00080 class PropertyPage;
00081 class RequestHeaders;
00082 class RequestProperties;
00083 class RequestTrace;
00084 class ResourceContext;
00085 class ResourceNamer;
00086 class RewriteDriverPool;
00087 class RewriteFilter;
00088 class SplitHtmlConfig;
00089 class Statistics;
00090 class UrlAsyncFetcher;
00091 class UrlLeftTrimFilter;
00092 class UrlNamer;
00093 class Writer;
00094 
00097 class RewriteDriver : public HtmlParse {
00098  public:
00100   enum CssResolutionStatus {
00101     kWriteFailed,
00102     kNoResolutionNeeded,
00103     kSuccess
00104   };
00105 
00107   enum WaitMode {
00108     kNoWait,  
00109     kWaitForCompletion,    
00110     kWaitForCachedRender,  
00111 
00112     kWaitForShutDown       
00113 
00114   };
00115 
00127   enum XhtmlStatus {
00128     kXhtmlUnknown,
00129     kIsXhtml,
00130     kIsNotXhtml
00131   };
00132 
00134   enum InlineAuthorizationPolicy {
00135     kInlineUnauthorizedResources,
00136     kInlineOnlyAuthorizedResources
00137   };
00138 
00140   enum IntendedFor {
00141     kIntendedForInlining,
00142     kIntendedForGeneral
00143   };
00144 
00148   static const char kDomCohort[];
00150   static const char kBeaconCohort[];
00151 
00154   static const char kLastRequestTimestamp[];
00156   static const char kParseSizeLimitExceeded[];
00158   static const char kSubresourcesPropertyName[];
00160   static const char kStatusCodePropertyName[];
00161 
00162   RewriteDriver(MessageHandler* message_handler,
00163                 FileSystem* file_system,
00164                 UrlAsyncFetcher* url_async_fetcher);
00165 
00168   virtual ~RewriteDriver();
00169 
00172   RewriteDriver* Clone();
00173 
00179   void Clear();
00180 
00182   static void InitStats(Statistics* statistics);
00183 
00185   static void Initialize();
00186   static void Terminate();
00187 
00190   void SetServerContext(ServerContext* server_context);
00191 
00194   bool MayCacheExtendCss() const;
00195   bool MayCacheExtendImages() const;
00196   bool MayCacheExtendPdfs() const;
00197   bool MayCacheExtendScripts() const;
00198 
00199   const GoogleString& user_agent() const { return user_agent_; }
00200 
00201   void SetUserAgent(const StringPiece& user_agent_string);
00202 
00203   const RequestProperties* request_properties() const {
00204     return request_properties_.get();
00205   }
00206 
00208   void ClearRequestProperties();
00209 
00211   bool using_spdy() const { return request_context_->using_spdy(); }
00212 
00213   bool write_property_cache_dom_cohort() const {
00214     return write_property_cache_dom_cohort_;
00215   }
00216   void set_write_property_cache_dom_cohort(bool x) {
00217     write_property_cache_dom_cohort_ = x;
00218   }
00219 
00220   RequestContextPtr request_context() { return request_context_; }
00221   void set_request_context(const RequestContextPtr& x);
00222 
00225   RequestTrace* trace_context();
00226 
00229   void TracePrintf(const char* fmt, ...);
00230 
00233   ResponseHeaders* mutable_response_headers() {
00234     return flush_occurred_ ? NULL : response_headers_;
00235   }
00236 
00243   const ResponseHeaders* response_headers() {
00244     return response_headers_;
00245   }
00246 
00250   void set_response_headers_ptr(ResponseHeaders* headers) {
00251     response_headers_ = headers;
00252   }
00253 
00259   void SetRequestHeaders(const RequestHeaders& headers);
00260 
00261   const RequestHeaders* request_headers() const {
00262     return request_headers_.get();
00263   }
00264 
00265   UserAgentMatcher* user_agent_matcher() const {
00266     DCHECK(server_context() != NULL);
00267     return server_context()->user_agent_matcher();
00268   }
00269 
00274   void AddFilters();
00275 
00279   void AddOwnedEarlyPreRenderFilter(HtmlFilter* filter);
00280 
00282   void PrependOwnedPreRenderFilter(HtmlFilter* filter);
00284   void AppendOwnedPreRenderFilter(HtmlFilter* filter);
00285 
00287   void AddOwnedPostRenderFilter(HtmlFilter* filter);
00289   void AddUnownedPostRenderFilter(HtmlFilter* filter);
00290 
00302   void AppendRewriteFilter(RewriteFilter* filter);
00303 
00306   void PrependRewriteFilter(RewriteFilter* filter);
00307 
00315   void AddResourceUrlClaimant(ResourceUrlClaimant* claimant);
00316 
00322   void SetWriter(Writer* writer);
00323 
00324   Writer* writer() const { return writer_; }
00325 
00354   bool FetchResource(const StringPiece& url, AsyncFetch* fetch);
00355 
00373   void FetchInPlaceResource(const GoogleUrl& gurl, bool proxy_mode,
00374                             AsyncFetch* async_fetch);
00375 
00382   bool FetchOutputResource(const OutputResourcePtr& output_resource,
00383                            RewriteFilter* filter,
00384                            AsyncFetch* async_fetch);
00385 
00392   OutputResourcePtr DecodeOutputResource(const GoogleUrl& url,
00393                                          RewriteFilter** filter) const;
00394 
00400   bool DecodeOutputResourceName(const GoogleUrl& url,
00401                                 const RewriteOptions* options_to_use,
00402                                 const UrlNamer* url_namer,
00403                                 ResourceNamer* name_out,
00404                                 OutputResourceKind* kind_out,
00405                                 RewriteFilter** filter_out) const;
00406 
00418   bool LookupMetadataForOutputResource(
00419       StringPiece url,
00420       GoogleString* error_out,
00421       RewriteContext::CacheLookupResultCallback* callback);
00422 
00424   bool DecodeUrl(const GoogleUrl& url,
00425                  StringVector* decoded_urls) const;
00426 
00429   bool DecodeUrlGivenOptions(const GoogleUrl& url,
00430                              const RewriteOptions* options,
00431                              const UrlNamer* url_namer,
00432                              StringVector* decoded_urls) const;
00433 
00434   FileSystem* file_system() { return file_system_; }
00435   UrlAsyncFetcher* async_fetcher() { return url_async_fetcher_; }
00436 
00441   void SetSessionFetcher(UrlAsyncFetcher* f);
00442 
00443   UrlAsyncFetcher* distributed_fetcher() { return distributed_async_fetcher_; }
00445   void set_distributed_fetcher(UrlAsyncFetcher* fetcher) {
00446     distributed_async_fetcher_ = fetcher;
00447   }
00448 
00451   CacheUrlAsyncFetcher* CreateCacheFetcher();
00453   CacheUrlAsyncFetcher* CreateCacheOnlyFetcher();
00454 
00455   ServerContext* server_context() const { return server_context_; }
00456   Statistics* statistics() const;
00457 
00459   void set_custom_options(RewriteOptions* options) {
00460     set_options_for_pool(NULL, options);
00461   }
00462 
00465   void set_options_for_pool(RewriteDriverPool* pool, RewriteOptions* options) {
00466     controlling_pool_ = pool;
00467     options_.reset(options);
00468   }
00469 
00471   RewriteDriverPool* controlling_pool() { return controlling_pool_; }
00472 
00474   const RewriteOptions* options() const { return options_.get(); }
00475 
00479   virtual bool StartParseId(const StringPiece& url, const StringPiece& id,
00480                             const ContentType& content_type);
00481 
00488   virtual void FinishParse();
00489 
00493   void FinishParseAsync(Function* callback);
00494 
00498   void InfoAt(const RewriteContext* context,
00499               const char* msg, ...) INSTAWEB_PRINTF_FORMAT(3, 4);
00500 
00505 
00512   OutputResourcePtr CreateOutputResourceFromResource(
00513       const StringPiece& filter_id,
00514       const UrlSegmentEncoder* encoder,
00515       const ResourceContext* data,
00516       const ResourcePtr& input_resource,
00517       OutputResourceKind kind);
00518 
00535   OutputResourcePtr CreateOutputResourceWithPath(
00536       const StringPiece& mapped_path, const StringPiece& unmapped_path,
00537       const StringPiece& base_url, const StringPiece& filter_id,
00538       const StringPiece& name, OutputResourceKind kind);
00539 
00542   void PopulateResourceNamer(
00543     const StringPiece& filter_id,
00544     const StringPiece& name,
00545     ResourceNamer* full_name);
00546 
00550   OutputResourcePtr CreateOutputResourceWithUnmappedUrl(
00551       const GoogleUrl& unmapped_gurl, const StringPiece& filter_id,
00552       const StringPiece& name, OutputResourceKind kind);
00553 
00556   OutputResourcePtr CreateOutputResourceWithMappedPath(
00557       const StringPiece& mapped_path, const StringPiece& unmapped_path,
00558       const StringPiece& filter_id, const StringPiece& name,
00559       OutputResourceKind kind) {
00560     return CreateOutputResourceWithPath(mapped_path, unmapped_path,
00561                                         decoded_base_url_.AllExceptLeaf(),
00562                                         filter_id, name, kind);
00563   }
00564 
00567   OutputResourcePtr CreateOutputResourceWithPath(
00568       const StringPiece& path, const StringPiece& filter_id,
00569       const StringPiece& name, OutputResourceKind kind) {
00570     return CreateOutputResourceWithPath(path, path, path, filter_id, name,
00571                                         kind);
00572   }
00573 
00578   ResourcePtr CreateInputResource(const GoogleUrl& input_url);
00579 
00593   ResourcePtr CreateInputResource(
00594       const GoogleUrl& input_url,
00595       InlineAuthorizationPolicy inline_authorization_policy,
00596       IntendedFor intended_for);
00597 
00601   ResourcePtr CreateInputResourceAbsoluteUnchecked(
00602       const StringPiece& absolute_url);
00603 
00607   bool IsResourceUrlClaimed(const GoogleUrl& url) const;
00608 
00613   bool MatchesBaseUrl(const GoogleUrl& input_url) const;
00614 
00620   bool MayRewriteUrl(const GoogleUrl& domain_url,
00621                      const GoogleUrl& input_url,
00622                      InlineAuthorizationPolicy inline_authorization_policy,
00623                      IntendedFor intended_for,
00624                      bool* is_authorized_domain) const;
00625 
00629   const GoogleUrl& base_url() const { return base_url_; }
00630 
00632   StringPiece fetch_url() const { return fetch_url_; }
00633 
00637   const GoogleUrl& decoded_base_url() const { return decoded_base_url_; }
00638   StringPiece decoded_base() const { return decoded_base_url_.Spec(); }
00639 
00641   bool IsHttps() const { return google_url().SchemeIs("https"); }
00642 
00643   const UrlSegmentEncoder* default_encoder() const { return &default_encoder_; }
00644 
00646   RewriteFilter* FindFilter(const StringPiece& id) const;
00647 
00649   bool refs_before_base() { return refs_before_base_; }
00650 
00655   void set_refs_before_base() { refs_before_base_ = true; }
00656 
00661   StringPiece containing_charset() { return containing_charset_; }
00662   void set_containing_charset(const StringPiece charset) {
00663     charset.CopyToString(&containing_charset_);
00664   }
00665 
00667   HtmlResourceSlotPtr GetSlot(const ResourcePtr& resource,
00668                               HtmlElement* elt,
00669                               HtmlElement::Attribute* attr);
00670 
00675   bool InitiateRewrite(RewriteContext* rewrite_context)
00676       LOCKS_EXCLUDED(rewrite_mutex());
00677   void InitiateFetch(RewriteContext* rewrite_context);
00678 
00689   void RewriteComplete(RewriteContext* rewrite_context, bool permit_render);
00690 
00694   void ReportSlowRewrites(int num);
00695 
00700   void Cleanup();
00701 
00706   void AddUserReference();
00707 
00709   GoogleString ToString(bool show_detached_contexts);
00710   void PrintState(bool show_detached_contexts); 
00711   void PrintStateToErrorLog(bool show_detached_contexts); 
00712 
00715   void WaitForCompletion();
00716 
00723   void WaitForShutDown();
00724 
00728   void BoundedWaitFor(WaitMode mode, int64 timeout_ms)
00729       LOCKS_EXCLUDED(rewrite_mutex());
00730 
00738   void set_fully_rewrite_on_flush(bool x) {
00739     fully_rewrite_on_flush_ = x;
00740   }
00741 
00743   bool fully_rewrite_on_flush() const {
00744     return fully_rewrite_on_flush_;
00745   }
00746 
00750   void set_fast_blocking_rewrite(bool x) {
00751     fast_blocking_rewrite_ = x;
00752   }
00753 
00754   bool fast_blocking_rewrite() const {
00755     return fast_blocking_rewrite_;
00756   }
00757 
00760   void EnableBlockingRewrite(RequestHeaders* request_headers);
00761 
00768   void set_externally_managed(bool x) { externally_managed_ = x; }
00769 
00773   void DetachFetch();
00774 
00777   void DetachedFetchComplete();
00778 
00782   void FetchComplete();
00783 
00789   void DeleteRewriteContext(RewriteContext* rewrite_context);
00790 
00791   int rewrite_deadline_ms() { return options()->rewrite_deadline_ms(); }
00792 
00797   void set_max_page_processing_delay_ms(int x) {
00798     max_page_processing_delay_ms_ = x;
00799   }
00800   int max_page_processing_delay_ms() { return max_page_processing_delay_ms_; }
00801 
00803   void set_device_type(UserAgentMatcher::DeviceType x) { device_type_ = x; }
00804   UserAgentMatcher::DeviceType device_type() const { return device_type_; }
00805 
00811   RewriteContext* RegisterForPartitionKey(const GoogleString& partition_key,
00812                                           RewriteContext* candidate);
00813 
00818   void DeregisterForPartitionKey(
00819       const GoogleString& partition_key, RewriteContext* candidate);
00820 
00823   void RequestFlush() { flush_requested_ = true; }
00824   bool flush_requested() const { return flush_requested_; }
00825 
00837   void ExecuteFlushIfRequested();
00838 
00842   void ExecuteFlushIfRequestedAsync(Function* callback);
00843 
00852   virtual void Flush();
00853 
00857   void FlushAsync(Function* done);
00858 
00860   void AddRewriteTask(Function* task);
00861 
00864   void AddLowPriorityRewriteTask(Function* task);
00865 
00866   QueuedWorkerPool::Sequence* html_worker() { return html_worker_; }
00867   QueuedWorkerPool::Sequence* rewrite_worker() { return rewrite_worker_; }
00868   QueuedWorkerPool::Sequence* low_priority_rewrite_worker() {
00869     return low_priority_rewrite_worker_;
00870   }
00871 
00872   Scheduler* scheduler() { return scheduler_; }
00873 
00876   DomainRewriteFilter* domain_rewriter() { return domain_rewriter_.get(); }
00877   UrlLeftTrimFilter* url_trim_filter() { return url_trim_filter_.get(); }
00878 
00886   CssResolutionStatus ResolveCssUrls(const GoogleUrl& input_css_base,
00887                                      const StringPiece& output_css_base,
00888                                      const StringPiece& contents,
00889                                      Writer* writer,
00890                                      MessageHandler* handler);
00891 
00899   bool ShouldAbsolutifyUrl(const GoogleUrl& input_base,
00900                            const GoogleUrl& output_base,
00901                            bool* proxy_mode) const;
00902 
00910   void UpdatePropertyValueInDomCohort(
00911       AbstractPropertyPage* page,
00912       StringPiece property_name,
00913       StringPiece property_value);
00914 
00917   PropertyPage* property_page() const;
00922   FallbackPropertyPage* fallback_property_page() const {
00923     return fallback_property_page_;
00924   }
00926   void set_property_page(PropertyPage* page);
00928   void set_fallback_property_page(FallbackPropertyPage* page);
00930   void set_unowned_fallback_property_page(FallbackPropertyPage* page);
00931 
00933   const CriticalLineInfo* critical_line_info() const;
00934 
00937   void set_critical_line_info(CriticalLineInfo* critical_line_info);
00938 
00939   CriticalKeys* beacon_critical_line_info() const;
00940   void set_beacon_critical_line_info(CriticalKeys* beacon_critical_line_info);
00941 
00942   const SplitHtmlConfig* split_html_config();
00943 
00944   CriticalCssResult* critical_css_result() const;
00947   void set_critical_css_result(CriticalCssResult* critical_css_rules);
00948 
00950   CriticalImagesInfo* critical_images_info() const {
00951     return critical_images_info_.get();
00952   }
00953 
00958   CriticalSelectorInfo* critical_selector_info() {
00959     return critical_selector_info_.get();
00960   }
00961 
00965   void set_critical_selector_info(CriticalSelectorInfo* info) {
00966     critical_selector_info_.reset(info);
00967   }
00968 
00973   void set_critical_images_info(CriticalImagesInfo* critical_images_info) {
00974     critical_images_info_.reset(critical_images_info);
00975   }
00976 
00979   bool CriticalSelectorsEnabled() const;
00980 
00983   bool FlattenCssImportsEnabled() const {
00984     return (options()->Enabled(RewriteOptions::kFlattenCssImports) ||
00985             (!options()->Forbidden(RewriteOptions::kFlattenCssImports) &&
00986              (CriticalSelectorsEnabled() ||
00987               options()->Enabled(RewriteOptions::kComputeCriticalCss))));
00988   }
00989 
00993   int num_inline_preview_images() const { return num_inline_preview_images_; }
00994 
00996   void increment_num_inline_preview_images();
00997 
01000   int num_flushed_early_pagespeed_resources() const {
01001     return num_flushed_early_pagespeed_resources_;
01002   }
01003 
01006   void increment_num_flushed_early_pagespeed_resources() {
01007     ++num_flushed_early_pagespeed_resources_;
01008   }
01009 
01012   void increment_async_events_count();
01013 
01015   void decrement_async_events_count();
01016 
01019   XhtmlStatus MimeTypeXhtmlStatus();
01020 
01021   void set_flushed_cached_html(bool x) { flushed_cached_html_ = x; }
01022   bool flushed_cached_html() { return flushed_cached_html_; }
01023 
01024   void set_flushing_cached_html(bool x) { flushing_cached_html_ = x; }
01025   bool flushing_cached_html() const { return flushing_cached_html_; }
01026 
01027   void set_flushed_early(bool x) { flushed_early_ = x; }
01028   bool flushed_early() const { return flushed_early_; }
01029 
01030   void set_flushing_early(bool x) { flushing_early_ = x; }
01031   bool flushing_early() const { return flushing_early_; }
01032 
01033   void set_is_lazyload_script_flushed(bool x) {
01034     is_lazyload_script_flushed_ = x;
01035   }
01036   bool is_lazyload_script_flushed() const {
01037     return is_lazyload_script_flushed_; }
01038 
01040   FlushEarlyInfo* flush_early_info();
01041 
01042   FlushEarlyRenderInfo* flush_early_render_info() const;
01043 
01046   void set_flush_early_render_info(
01047       FlushEarlyRenderInfo* flush_early_render_info);
01048 
01051   bool DebugMode() const { return options()->Enabled(RewriteOptions::kDebug); }
01052 
01055   void SaveOriginalHeaders(const ResponseHeaders& response_headers);
01056 
01059   AbstractLogRecord* log_record();
01060 
01061   DomStatsFilter* dom_stats_filter() const {
01062     return dom_stats_filter_;
01063   }
01064 
01067   bool can_rewrite_resources() const { return can_rewrite_resources_; }
01068 
01070   bool is_nested() const { return is_nested_; }
01071 
01075   bool MetadataRequested(const RequestHeaders& request_headers) const;
01076 
01078   bool tried_to_distribute_fetch() const { return tried_to_distribute_fetch_; }
01079 
01092   bool Write(const ResourceVector& inputs,
01093              const StringPiece& contents,
01094              const ContentType* type,
01095              StringPiece charset,
01096              OutputResource* output);
01097 
01098   void set_defer_instrumentation_script(bool x) {
01099     defer_instrumentation_script_ = x;
01100   }
01101   bool defer_instrumentation_script() const {
01102     return defer_instrumentation_script_;
01103   }
01104 
01107   void set_num_initiated_rewrites(int64 x) {
01108     ScopedMutex lock(rewrite_mutex());
01109     num_initiated_rewrites_ = x;
01110   }
01111   int64 num_initiated_rewrites() const {
01112     ScopedMutex lock(rewrite_mutex());
01113     return num_initiated_rewrites_;
01114   }
01116   void set_num_detached_rewrites(int64 x) {
01117     ScopedMutex lock(rewrite_mutex());
01118     num_detached_rewrites_ = x;
01119   }
01120   int64 num_detached_rewrites() const {
01121     ScopedMutex lock(rewrite_mutex());
01122     return num_detached_rewrites_;
01123   }
01124 
01125   void set_pagespeed_query_params(StringPiece x) {
01126     x.CopyToString(&pagespeed_query_params_);
01127   }
01128   StringPiece pagespeed_query_params() const {
01129     return pagespeed_query_params_;
01130   }
01131 
01134   const GoogleString& CacheFragment() const;
01135 
01136  protected:
01137   virtual void DetermineEnabledFiltersImpl();
01138 
01139  private:
01140   friend class DistributedRewriteContextTest;
01141   friend class RewriteContext;
01142   friend class RewriteDriverTest;
01143   friend class RewriteTestBase;
01144   friend class ServerContextTest;
01145 
01146   typedef std::map<GoogleString, RewriteFilter*> StringFilterMap;
01147 
01149   bool ShouldDistributeFetch(const StringPiece& filter_id);
01150 
01162   bool DistributeFetch(const StringPiece& url, const StringPiece& filter_id,
01163                        AsyncFetch* async_fetch);
01164 
01169   void CheckForCompletionAsync(WaitMode wait_mode, int64 timeout_ms,
01170                                Function* done)
01171       EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
01172 
01176   void TryCheckForCompletion(WaitMode wait_mode, int64 end_time_ms,
01177                              Function* done)
01178       EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
01179 
01181   bool IsDone(WaitMode wait_mode, bool deadline_reached)
01182       EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
01183 
01186   bool WaitForPendingAsyncEvents(WaitMode wait_mode) {
01187     return wait_mode == kWaitForShutDown ||
01188         (fully_rewrite_on_flush_ && !fast_blocking_rewrite_);
01189   }
01190 
01194   void FlushAsyncDone(int num_rewrites, Function* callback);
01195 
01200   int64 ComputeCurrentFlushWindowRewriteDelayMs();
01201 
01203   void QueueFlushAsyncDone(int num_rewrites, Function* callback);
01204 
01207   void QueueFinishParseAfterFlush(Function* user_callback);
01208   void FinishParseAfterFlush(Function* user_callback);
01209 
01210   bool RewritesComplete() const EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
01211 
01214   void SetBaseUrlIfUnset(const StringPiece& new_base);
01215 
01218   void SetBaseUrlForFetch(const StringPiece& url);
01219 
01222   void SetDecodedUrlFromBase();
01223 
01225   AbstractMutex* rewrite_mutex() const LOCK_RETURNED(scheduler_->mutex()) {
01226     return scheduler_->mutex();
01227   }
01228 
01230   virtual void ParseTextInternal(const char* content, int size);
01231 
01233   bool ShouldSkipParsing();
01234 
01235   friend class ScanFilter;
01236 
01240   void RegisterRewriteFilter(RewriteFilter* filter);
01241 
01246   void EnableRewriteFilter(const char* id);
01247 
01253   ResourcePtr CreateInputResourceUnchecked(const GoogleUrl& gurl,
01254                                            bool is_authorized_domain);
01255 
01256   void AddPreRenderFilters();
01257   void AddPostRenderFilters();
01258 
01260   bool DecodeOutputResourceNameHelper(const GoogleUrl& url,
01261                                       const RewriteOptions* options_to_use,
01262                                       const UrlNamer* url_namer,
01263                                       ResourceNamer* name_out,
01264                                       OutputResourceKind* kind_out,
01265                                       RewriteFilter** filter_out,
01266                                       GoogleString* url_base,
01267                                       StringVector* urls) const;
01268 
01278   void WriteDomCohortIntoPropertyCache();
01279 
01281   CacheUrlAsyncFetcher* CreateCustomCacheFetcher(UrlAsyncFetcher* base_fetcher);
01282 
01289   void PossiblyPurgeCachedResponseAndReleaseDriver();
01290 
01292   void LogStats();
01293 
01309   bool PrepareShouldSignal() EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
01310   void SignalIfRequired(bool result_of_prepare_should_signal)
01311       EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
01312 
01324   bool base_was_set_;
01325 
01330   bool refs_before_base_;
01331 
01333   GoogleString containing_charset_;
01334 
01337   void PopulateRequestContext();
01338 
01339   bool filters_added_;
01340   bool externally_managed_;
01341 
01350   enum RefCategory {
01351     kRefUser,  
01352     kRefParsing,  
01353 
01357     kRefPendingRewrites,
01358 
01362     kRefDetachedRewrites,
01363 
01371     kRefDeletingRewrites,
01372 
01374     kRefFetchUserFacing,
01375 
01377     kRefFetchBackground,
01378 
01383     kRefAsyncEvents,
01384 
01385     kNumRefCategories
01386   };
01387 
01388   friend class CategorizedRefcount<RewriteDriver, RefCategory>;
01389 
01391   CategorizedRefcount<RewriteDriver, RefCategory> ref_counts_;
01392 
01394   void LastRefRemoved();
01395   StringPiece RefCategoryName(RefCategory cat);
01396 
01399   void DropReference(RefCategory cat);
01400 
01403   bool release_driver_;
01404 
01407   bool parsing_ GUARDED_BY(rewrite_mutex());
01408 
01412   WaitMode waiting_ GUARDED_BY(rewrite_mutex());
01413 
01415   bool waiting_deadline_reached_ GUARDED_BY(rewrite_mutex());
01416 
01421   bool fully_rewrite_on_flush_;
01422 
01425   bool fast_blocking_rewrite_;
01426 
01427   bool flush_requested_;
01428   bool flush_occurred_;
01429 
01431   bool flushed_cached_html_;
01432 
01434   bool flushing_cached_html_;
01435 
01438   bool flushed_early_;
01442   bool flushing_early_;
01443 
01446   bool is_lazyload_script_flushed_;
01447 
01451   bool write_property_cache_dom_cohort_;
01452 
01455   GoogleUrl base_url_;
01456 
01460   GoogleUrl decoded_base_url_;
01461 
01464   GoogleString fetch_url_;
01465 
01466   GoogleString user_agent_;
01467 
01468   LazyBool should_skip_parsing_;
01469 
01470   StringFilterMap resource_filter_map_;
01471 
01472   ResponseHeaders* response_headers_;
01473 
01476   scoped_ptr<const RequestHeaders> request_headers_;
01477 
01478   int status_code_; 
01479 
01482   typedef std::vector<RewriteContext*> RewriteContextVector;
01483   RewriteContextVector rewrites_; 
01484 
01487   int max_page_processing_delay_ms_;
01488 
01489   typedef std::set<RewriteContext*> RewriteContextSet;
01490 
01495   RewriteContextSet initiated_rewrites_ GUARDED_BY(rewrite_mutex());
01496 
01498   int64 num_initiated_rewrites_ GUARDED_BY(rewrite_mutex());
01499 
01507   int64 num_detached_rewrites_ GUARDED_BY(rewrite_mutex());
01508 
01516   RewriteContextSet detached_rewrites_ GUARDED_BY(rewrite_mutex());
01517 
01519   int possibly_quick_rewrites_ GUARDED_BY(rewrite_mutex());
01520 
01523   RewriteContextVector fetch_rewrites_;
01524 
01527   FileSystem* file_system_;
01528   ServerContext* server_context_;
01529   Scheduler* scheduler_;
01530   UrlAsyncFetcher* default_url_async_fetcher_; 
01531 
01535   UrlAsyncFetcher* url_async_fetcher_;
01536 
01540   UrlAsyncFetcher* distributed_async_fetcher_;
01541 
01544   std::vector<UrlAsyncFetcher*> owned_url_async_fetchers_;
01545 
01546   DomStatsFilter* dom_stats_filter_;
01547   scoped_ptr<HtmlWriterFilter> html_writer_filter_;
01548 
01549   ScanFilter scan_filter_;
01550   scoped_ptr<DomainRewriteFilter> domain_rewriter_;
01551   scoped_ptr<UrlLeftTrimFilter> url_trim_filter_;
01552 
01555   typedef std::map<GoogleString, RewriteContext*> PrimaryRewriteContextMap;
01556   PrimaryRewriteContextMap primary_rewrite_context_map_;
01557 
01558   HtmlResourceSlotSet slots_;
01559 
01560   scoped_ptr<RewriteOptions> options_;
01561 
01562   RewriteDriverPool* controlling_pool_; 
01563 
01565   scoped_ptr<CacheUrlAsyncFetcher::AsyncOpHooks>
01566       cache_url_async_fetcher_async_op_hooks_;
01567 
01569   UrlSegmentEncoder default_encoder_;
01570 
01572   FilterList early_pre_render_filters_;
01574   FilterList pre_render_filters_;
01575 
01577   std::vector<ResourceUrlClaimant*> resource_claimants_;
01578 
01582   FilterVector filters_to_delete_;
01583 
01584   QueuedWorkerPool::Sequence* html_worker_;
01585   QueuedWorkerPool::Sequence* rewrite_worker_;
01586   QueuedWorkerPool::Sequence* low_priority_rewrite_worker_;
01587 
01588   Writer* writer_;
01589 
01592   FallbackPropertyPage* fallback_property_page_;
01593 
01595   bool owns_property_page_;
01596 
01598   UserAgentMatcher::DeviceType device_type_;
01599 
01600   scoped_ptr<CriticalLineInfo> critical_line_info_;
01601   scoped_ptr<CriticalKeys> beacon_critical_line_info_;
01602 
01603   scoped_ptr<SplitHtmlConfig> split_html_config_;
01604 
01607   scoped_ptr<CriticalImagesInfo> critical_images_info_;
01608   scoped_ptr<CriticalSelectorInfo> critical_selector_info_;
01609 
01610   scoped_ptr<CriticalCssResult> critical_css_result_;
01611 
01613   bool xhtml_mimetype_computed_;
01614   XhtmlStatus xhtml_status_ : 8;
01615 
01618   int num_inline_preview_images_;
01619 
01621   int num_flushed_early_pagespeed_resources_;
01622 
01624   int num_bytes_in_;
01625 
01626   DebugFilter* debug_filter_;
01627 
01628   scoped_ptr<FlushEarlyInfo> flush_early_info_;
01629   scoped_ptr<FlushEarlyRenderInfo> flush_early_render_info_;
01630 
01631   bool can_rewrite_resources_;
01632   bool is_nested_;
01633 
01636   RequestContextPtr request_context_;
01637 
01639   int64 start_time_ms_;
01640 
01641   scoped_ptr<RequestProperties> request_properties_;
01642 
01646   static int initialized_count_;
01647 
01650   bool tried_to_distribute_fetch_;
01651 
01654   bool defer_instrumentation_script_;
01655 
01657   DownstreamCachePurger downstream_cache_purger_;
01658 
01660   GoogleString pagespeed_query_params_;
01661 
01662   DISALLOW_COPY_AND_ASSIGN(RewriteDriver);
01663 };
01664 
01667 class OptionsAwareHTTPCacheCallback : public HTTPCache::Callback {
01668  public:
01669   virtual ~OptionsAwareHTTPCacheCallback();
01670   virtual bool IsCacheValid(const GoogleString& key,
01671                             const ResponseHeaders& headers);
01672   virtual int64 OverrideCacheTtlMs(const GoogleString& key);
01673   virtual ResponseHeaders::VaryOption RespectVaryOnResources() const;
01674 
01678   static bool IsCacheValid(const GoogleString& key,
01679                            const RewriteOptions& rewrite_options,
01680                            const RequestContextPtr& request_ctx,
01681                            const ResponseHeaders& headers);
01682 
01683  protected:
01686   OptionsAwareHTTPCacheCallback(
01687       const RewriteOptions* rewrite_options,
01688       const RequestContextPtr& request_ctx);
01689 
01690  private:
01691   const RewriteOptions* rewrite_options_;
01692 
01693   DISALLOW_COPY_AND_ASSIGN(OptionsAwareHTTPCacheCallback);
01694 };
01695 
01696 }  
01697 
01698 #endif  ///< NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines