Page Speed Optimization Libraries  1.2.24.1
net/instaweb/util/public/url_escaper.h
Go to the documentation of this file.
00001 /*
00002  * Copyright 2010 Google Inc.
00003  *
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  *
00008  *      http:///www.apache.org/licenses/LICENSE-2.0
00009  *
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 
00018 
00019 #ifndef NET_INSTAWEB_UTIL_PUBLIC_URL_ESCAPER_H_
00020 #define NET_INSTAWEB_UTIL_PUBLIC_URL_ESCAPER_H_
00021 
00022 #include "net/instaweb/util/public/string.h"
00023 #include "net/instaweb/util/public/string_util.h"
00024 
00025 namespace net_instaweb {
00026 
00027 /*
00028 common url format
00029 
00030   http:///www.foo.bar/z1234/b_c.d?e=f&g=h
00031 
00032 this suggests we should have short encodings for  a-zA-Z0-9.:/?&
00033 
00034 We would like the above URL to be reasonably legible if possible.
00035 However it's also nice if it's short.
00036 
00037 One annoyance is that we are using '.' to delimit the 4 fields of an
00038 instaweb-generated URL.  That can probably be changed to use ^, which
00039 is a legal URL but is rarely used in URLs.  This would enable us to
00040 leave . alone.  But that's probably a moderately painful change
00041 involving a fair amount of regolding.
00042 
00043 In the meantime we can replace . with ^ in this encoder so the they
00044 don't change size.  So the transform table is:
00045 
00046 a-zA-Z0-9_=+-&? unchanged
00047 http:///         ,h
00048 %               ,P
00049 /               ,_
00050 \               ,-
00051 ,               ,
00052 ?               ,q
00053 &               ,a
00054 
00055 everything else ,XX  where xx are hex digits using capital latters
00056 
00057 
00058 The intent of this class to to help encode arbitrary URLs (really, any
00059 stream of 8-byte characters, but optimized for URLs) so that it can be
00060 used in one 'segment' of a new URL.  This means we will not output
00061 . or / but will instead escape those.
00062 */
00063 
00064 namespace UrlEscaper {
00065 
00066 void EncodeToUrlSegment(const StringPiece& in, GoogleString* url_segment);
00067 bool DecodeFromUrlSegment(const StringPiece& url_segment, GoogleString* out);
00068 
00069 }  
00070 
00071 }  
00072 
00073 #endif  ///< NET_INSTAWEB_UTIL_PUBLIC_URL_ESCAPER_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines