zlib库解压http报文中的gzip数据

  • A+
所属分类:Web开发 技术展台

抓HTTP包的时候发现很多网站的响应报文是gzip压缩后的数据,存放在一个或多个chunk里面(参见HTTP响应报文中的chunked)。这些gzip数据是不可阅读的,需要进行解压。一开始在网上找到了一份可以正常运行的代码,贴出来:

http://hi.baidu.com/xzq2000/blog/item/c5429f2fd6a646301f308991.html/cmtid/332e72f08f0b53a2a40f5237

 char* ungzip(char* source,int len) { int err; z_stream d_stream; Byte compr[segment_size]={0}, uncompr[segment_size*4]={0}; memcpy(compr,(Byte*)source,len); uLong comprLen, uncomprLen; comprLen = sizeof(compr) / sizeof(compr[0]); uncomprLen = 4*comprLen; strcpy((char*)uncompr, "garbage"); d_stream.zalloc = (alloc_func)0; d_stream.zfree = (free_func)0; d_stream.opaque = (voidpf)0; d_stream.next_in = compr; d_stream.avail_in = 0; d_stream.next_out = uncompr; err = inflateInit2(&d_stream,47); if(err!=Z_OK) { printf("inflateInit2 error:%d",err); return NULL; } while (d_stream.total_out < uncomprLen && d_stream.total_in < comprLen) { d_stream.avail_in = d_stream.avail_out = 1; err = inflate(&d_stream,Z_NO_FLUSH); if(err == Z_STREAM_END) break; if(err!=Z_OK) { printf("inflate error:%d",err); return NULL; } } err = inflateEnd(&d_stream); if(err!=Z_OK) { printf("inflateEnd error:%d",err); return NULL; } char* b = new char[d_stream.total_out+1]; memset(b,0,d_stream.total_out+1); memcpy(b,(char*)uncompr,d_stream.total_out); return b; }

后来看了zlib usage example(参见zlib使用范例),模仿写了一段代码,可以正常运行,而且感觉比上面的代码要快,因为上面的代码把z_stream的avail_in和avail_out都设为1了,只能一字节一字节地解压,非常慢。

 #include  #include  #include  #include  #define segment_size 1460//largest tcp data segment int ungzip(char* source,int len,char*des) { int ret,have; int offset=0; z_stream d_stream; Byte compr[segment_size]={0}, uncompr[segment_size*4]={0}; memcpy(compr,(Byte*)source,len); uLong comprLen, uncomprLen; comprLen =len;//一开始写成了comprlen=sizeof(compr)以及comprlen=strlen(compr),后来发现都不对。 //sizeof(compr)永远都是segment_size,显然不对,strlen(compr)也是不对的,因为strlen只算到�之前, //但是gzip或者zlib数据里�很多。 uncomprLen = segment_size*4; strcpy((char*)uncompr, "garbage"); d_stream.zalloc = Z_NULL; d_stream.zfree = Z_NULL; d_stream.opaque = Z_NULL; d_stream.next_in = Z_NULL;//inflateInit和inflateInit2都必须初始化next_in和avail_in d_stream.avail_in = 0;//deflateInit和deflateInit2则不用 ret = inflateInit2(&d_stream,47); if(ret!=Z_OK) { printf("inflateInit2 error:%d",ret); return ret; } d_stream.next_in=compr; d_stream.avail_in=comprLen; do { d_stream.next_out=uncompr; d_stream.avail_out=uncomprLen; ret = inflate(&d_stream,Z_NO_FLUSH); assert(ret != Z_STREAM_ERROR); switch (ret) { case Z_NEED_DICT: ret = Z_DATA_ERROR; case Z_DATA_ERROR: case Z_MEM_ERROR: (void)inflateEnd(&d_stream); return ret; } have=uncomprLen-d_stream.avail_out; memcpy(des+offset,uncompr,have);//这里一开始我写成了memcpy(des+offset,d_stream.next_out,have); //后来发现这是不对的,因为next_out指向的下次的输出,现在指向的是无有意义数据的内存。见下图 offset+=have; }while(d_stream.avail_out==0); inflateEnd(&d_stream); memcpy(des+offset,"�",1); return ret; } 

下载服务恢复

发表评论

:?::razz::sad::evil::!::smile::oops::grin::eek::shock::???::cool::lol::mad::twisted::roll::wink::idea::arrow::neutral::cry::mrgreen:

Captcha Code