IT戦記

プログラミング、起業などについて書いているプログラマーのブログです😚

libxml2 内のメモリの解放

ちょっとメモ

今日も valgrind 使ってます

libxml2 を使って以下のような XML のパース処理を書く

#include <iostream>
#include <sstream>
#include <boost/shared_ptr.hpp>
#include <libxml/xmlreader.h>

static int read(void* in, char* buf, int len)
{
    return static_cast<std::istream*>(in)->readsome(buf, len);
}

int main()
{
    std::istringstream in("<a><b/>hoge<c/></a>");
    boost::shared_ptr<xmlTextReader> reader(xmlReaderForIO(read, NULL, &in, NULL, NULL, 0), xmlFreeTextReader);

    while (xmlTextReaderRead(reader.get()) == 1)
    {   
        std::cout << "node depth: " << xmlTextReaderDepth(reader.get()) << std::endl;
        std::cout << "node type : " << xmlTextReaderNodeType(reader.get()) << std::endl;

        const char* name = reinterpret_cast<const char*>(xmlTextReaderConstName(reader.get()));
        std::cout << "node name : " << (name ? name : "") << std::endl;

        const char* value = reinterpret_cast<const char*>(xmlTextReaderConstValue(reader.get()));
        std::cout << "node value: " << (value ? value : "") << std::endl;
        std::cout << std::endl;
    }   
}

で、コンパイルして実行

一見ウマくいっているように見える。というかウマくいっている。

$ g++ -g -lxml2 main.cpp && ./a.out
node depth: 0
node type : 1
node name : a
node value: 

node depth: 1
node type : 1
node name : b
node value: 

node depth: 1
node type : 3
node name : #text
node value: hoge

node depth: 1
node type : 1
node name : c
node value: 

node depth: 0
node type : 15
node name : a
node value: 

でも、 valgrind してみると

$ valgrind ./a.out 
==3940== Memcheck, a memory error detector.
==3940== Copyright (C) 2002-2008, and GNU GPL'd, by Julian Seward et al.
==3940== Using LibVEX rev 1884, a library for dynamic binary translation.
==3940== Copyright (C) 2004-2008, and GNU GPL'd, by OpenWorks LLP.
==3940== Using valgrind-3.4.1-Debian, a dynamic binary instrumentation framework.
==3940== Copyright (C) 2000-2008, and GNU GPL'd, by Julian Seward et al.
==3940== For more details, rerun with: -v
==3940== 
node depth: 0
node type : 1
node name : a
node value: 

node depth: 1
node type : 1
node name : b
node value: 

node depth: 1
node type : 3
node name : #text
node value: hoge

node depth: 1
node type : 1
node name : c
node value: 

node depth: 0
node type : 15
node name : a
node value: 

==3940== 
==3940== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 23 from 1)
==3940== malloc/free: in use at exit: 506 bytes in 18 blocks.
==3940== malloc/free: 52 allocs, 34 frees, 22,072 bytes allocated.
==3940== For counts of detected errors, rerun with: -v
==3940== searching for pointers to 18 not-freed blocks.
==3940== checked 132,412 bytes.
==3940== 
==3940== LEAK SUMMARY:
==3940==    definitely lost: 0 bytes in 0 blocks.
==3940==      possibly lost: 0 bytes in 0 blocks.
==3940==    still reachable: 506 bytes in 18 blocks.
==3940==         suppressed: 0 bytes in 0 blocks.
==3940== Rerun with --leak-check=full to see details of leaked memory.

解放されなかったメモリがあることが分かる

ここで、メモリが解放されないパターンを2つ考えてみる

  • ライブラリが内部でメモリプールとして使っているメモリ
    • 別に解放しなくてもいい
  • 解放し忘れ
    • 解放したほうがいい

というわけで、以下のように同じコードを繰り返し走らせてメモリが増加しないかを調べる

#include <iostream>
#include <sstream>
#include <boost/shared_ptr.hpp>
#include <libxml/xmlreader.h>

static int read(void* in, char* buf, int len)
{
    return static_cast<std::istream*>(in)->readsome(buf, len);
}

void sub()
{
    std::istringstream in("<a><b/>hoge<c/></a>");
    boost::shared_ptr<xmlTextReader> reader(xmlReaderForIO(read, NULL, &in, NULL, NULL, 0), xmlFreeTextReader);

    while (xmlTextReaderRead(reader.get()) == 1)
    {   
        std::cout << "node depth: " << xmlTextReaderDepth(reader.get()) << std::endl;
        std::cout << "node type : " << xmlTextReaderNodeType(reader.get()) << std::endl;

        const char* name = reinterpret_cast<const char*>(xmlTextReaderConstName(reader.get()));
        std::cout << "node name : " << (name ? name : "") << std::endl;

        const char* value = reinterpret_cast<const char*>(xmlTextReaderConstValue(reader.get()));
        std::cout << "node value: " << (value ? value : "") << std::endl;
        std::cout << std::endl;
    }   
}

int main() { sub(); sub(); sub; } // こんな感じ

これでも、同じメモリしか残っていないので多分ライブラリ内のメモリプールだろうと分かる。

それでも解放できたら解放したい

他のリークに気が付かないかもしれないから。

valgrind の --leak-check=full オプションでメモリが確保された場所を調べる

メモリの確保された箇所が分かる

$ valgrind --leak-check=full --show-reachable=yes ./a.out 

  (snip)

==3979== 
==3979== 62 bytes in 8 blocks are still reachable in loss record 1 of 4
==3979==    at 0x4026FDE: malloc (vg_replace_malloc.c:207)
==3979==    by 0x40E0C93: xmlStrndup (xmlstring.c:45)
==3979==    by 0x40E0D27: xmlStrdup (xmlstring.c:71)
==3979==    by 0x404A815: xmlNewCharEncodingHandler (encoding.c:1286)
==3979==    by 0x404A984: xmlInitCharEncodingHandlers (encoding.c:1351)
==3979==    by 0x404AC35: xmlGetCharEncodingHandler (encoding.c:1444)
==3979==    by 0x408346A: xmlAllocParserInputBuffer (xmlIO.c:2239)
==3979==    by 0x408415E: xmlParserInputBufferCreateIO (xmlIO.c:2836)
==3979==    by 0x4133230: xmlReaderForIO (xmlreader.c:5267)
==3979==    by 0x8048FC7: main (main.cpp:14)
==3979== 
==3979== 
==3979== 84 bytes in 1 blocks are still reachable in loss record 2 of 4
==3979==    at 0x4026FDE: malloc (vg_replace_malloc.c:207)
==3979==    by 0x40DDAA2: xmlNewRMutex (threads.c:291)
==3979==    by 0x4147587: xmlInitializeDict (dict.c:95)
==3979==    by 0x4147C42: xmlDictCreate (dict.c:327)
==3979==    by 0x4050597: xmlInitParserCtxt (parserInternals.c:1517)
==3979==    by 0x405101F: xmlNewParserCtxt (parserInternals.c:1768)
==3979==    by 0x406BA9F: xmlCreatePushParserCtxt (parser.c:10683)
==3979==    by 0x412E5FF: xmlNewTextReader (xmlreader.c:2075)
==3979==    by 0x4133252: xmlReaderForIO (xmlreader.c:5271)
==3979==    by 0x8048FC7: main (main.cpp:14)
==3979== 
==3979== 
==3979== 160 bytes in 8 blocks are still reachable in loss record 3 of 4
==3979==    at 0x4026FDE: malloc (vg_replace_malloc.c:207)
==3979==    by 0x404A844: xmlNewCharEncodingHandler (encoding.c:1295)
==3979==    by 0x404A984: xmlInitCharEncodingHandlers (encoding.c:1351)
==3979==    by 0x404AC35: xmlGetCharEncodingHandler (encoding.c:1444)
==3979==    by 0x408346A: xmlAllocParserInputBuffer (xmlIO.c:2239)
==3979==    by 0x408415E: xmlParserInputBufferCreateIO (xmlIO.c:2836)
==3979==    by 0x4133230: xmlReaderForIO (xmlreader.c:5267)
==3979==    by 0x8048FC7: main (main.cpp:14)
==3979== 
==3979== 
==3979== 200 bytes in 1 blocks are still reachable in loss record 4 of 4
==3979==    at 0x4026FDE: malloc (vg_replace_malloc.c:207)
==3979==    by 0x404A8F5: xmlInitCharEncodingHandlers (encoding.c:1337)
==3979==    by 0x404AC35: xmlGetCharEncodingHandler (encoding.c:1444)
==3979==    by 0x408346A: xmlAllocParserInputBuffer (xmlIO.c:2239)
==3979==    by 0x408415E: xmlParserInputBufferCreateIO (xmlIO.c:2836)
==3979==    by 0x4133230: xmlReaderForIO (xmlreader.c:5267)
==3979==    by 0x8048FC7: main (main.cpp:14)
==3979== 
==3979== LEAK SUMMARY:
==3979==    definitely lost: 0 bytes in 0 blocks.
==3979==      possibly lost: 0 bytes in 0 blocks.
==3979==    still reachable: 506 bytes in 18 blocks.
==3979==         suppressed: 0 bytes in 0 blocks.

で、該当の箇所をみてみると

  • xmlCleanupCharEncodingHandlers()
  • xmlDictCleanup()

で、メモリプールを解放出来ることが分かる

コードを修正

#include <iostream>
#include <sstream>
#include <boost/shared_ptr.hpp>
#include <libxml/xmlreader.h>

static int read(void* in, char* buf, int len)
{
    return static_cast<std::istream*>(in)->readsome(buf, len);
}

int main()
{
    // 以下を追加
    struct Finalizer
    {   
        ~Finalizer()
        {   
            xmlCleanupCharEncodingHandlers();
            xmlDictCleanup();
        }   
    } finalizer;

    std::istringstream in("<a><b/>hoge<c/></a>");
    boost::shared_ptr<xmlTextReader> reader(xmlReaderForIO(read, NULL, &in, NULL, NULL, 0), xmlFreeTextReader);

    while (xmlTextReaderRead(reader.get()) == 1)
    {   
        std::cout << "node depth: " << xmlTextReaderDepth(reader.get()) << std::endl;
        std::cout << "node type : " << xmlTextReaderNodeType(reader.get()) << std::endl;

        const char* name = reinterpret_cast<const char*>(xmlTextReaderConstName(reader.get()));
        std::cout << "node name : " << (name ? name : "") << std::endl;

        const char* value = reinterpret_cast<const char*>(xmlTextReaderConstValue(reader.get()));
        std::cout << "node value: " << (value ? value : "") << std::endl;
        std::cout << std::endl;
    }   
}

で、再度コンパイルして valgrind する

これで、 still reachable なメモリがなくなりました。

$ g++ -g -lxml2 main.cpp && valgrind ./a.out

  (snip)

==4182== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 23 from 1)
==4182== malloc/free: in use at exit: 0 bytes in 0 blocks.
==4182== malloc/free: 52 allocs, 52 frees, 22,072 bytes allocated.
==4182== For counts of detected errors, rerun with: -v
==4182== All heap blocks were freed -- no leaks are possible.

このように

libxml2 では操作に対して適切に cleanup 系のメソッドを読んでやれば、奇麗にメモリを解放することができるんですね。

まとめ

valgrind で多い日も安心!