Detect corrupt symbol files during minidump processing. Recover from the errors and use the good data if possible.

More specifically:
 - Detect corrupt symbols during minidump processing and provide the list of modules with corrupt symbols in the ProcessState.  This will allow listing the corrupt symbol files in the final crash report.
 - Skip and recover from symbol data parse errors - don't give up until 100 parse errors are seen.
 - In order to recover from '\0' (null terminator) in the middle of a symbol file, a couple of methods have to be updated to require both buffer pointer and length.  Previously they required only a buffer pointer (char *) and the size of the buffer was evaluated using strlen which is not reliable when the data is corrupt.  Most of the changes are due to these signature updates.
 - Added and updated unittests.

Also, updated minidump_stackwalk to show a WARNING for corrupt symbols.  Output looks like this:
...
Loaded modules:
0x000da000 - 0x000dafff  Google Chrome Canary  ???  (main)
0x000e0000 - 0x0417dfff  Google Chrome Framework  0.1500.0.3  (WARNING: Corrupt symbols, Google Chrome Framework, 4682A6B4136436C4BFECEB62D498020E0)
0x044a8000 - 0x04571fff  IOBluetooth  0.1.0.0
...
Review URL: https://breakpad.appspot.com/613002

git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@1200 4c0a9323-5329-0410-9bdc-e9ce6186880e
This commit is contained in:
ivan.penkov@gmail.com 2013-07-11 01:36:06 +00:00
parent 92537aa818
commit 8819ab0844
34 changed files with 572 additions and 213 deletions

View file

@ -59,6 +59,7 @@ class BasicSourceLineResolver : public SourceLineResolverBase {
using SourceLineResolverBase::ShouldDeleteMemoryBufferAfterLoadModule;
using SourceLineResolverBase::UnloadModule;
using SourceLineResolverBase::HasModule;
using SourceLineResolverBase::IsModuleCorrupt;
using SourceLineResolverBase::FillSourceLineInfo;
using SourceLineResolverBase::FindWindowsFrameInfo;
using SourceLineResolverBase::FindCFIFrameInfo;

View file

@ -63,6 +63,7 @@ class FastSourceLineResolver : public SourceLineResolverBase {
using SourceLineResolverBase::FindCFIFrameInfo;
using SourceLineResolverBase::FindWindowsFrameInfo;
using SourceLineResolverBase::HasModule;
using SourceLineResolverBase::IsModuleCorrupt;
using SourceLineResolverBase::LoadModule;
using SourceLineResolverBase::LoadModuleUsingMapBuffer;
using SourceLineResolverBase::LoadModuleUsingMemoryBuffer;

View file

@ -109,6 +109,9 @@ class ProcessState {
const vector<const CodeModule*>* modules_without_symbols() const {
return &modules_without_symbols_;
}
const vector<const CodeModule*>* modules_with_corrupt_symbols() const {
return &modules_with_corrupt_symbols_;
}
ExploitabilityRating exploitability() const { return exploitability_; }
private:
@ -164,6 +167,9 @@ class ProcessState {
// The modules that didn't have symbols when the report was processed.
vector<const CodeModule*> modules_without_symbols_;
// The modules that had corrupt symbols when the report was processed.
vector<const CodeModule*> modules_with_corrupt_symbols_;
// The exploitability rating as determined by the exploitability
// engine. When the exploitability engine is not enabled this
// defaults to EXPLOITABILITY_NONE.

View file

@ -42,6 +42,7 @@
#define GOOGLE_BREAKPAD_PROCESSOR_SOURCE_LINE_RESOLVER_BASE_H__
#include <map>
#include <set>
#include <string>
#include "google_breakpad/processor/source_line_resolver_interface.h"
@ -49,6 +50,7 @@
namespace google_breakpad {
using std::map;
using std::set;
// Forward declaration.
// ModuleFactory is a simple factory interface for creating a Module instance
@ -62,7 +64,9 @@ class SourceLineResolverBase : public SourceLineResolverInterface {
// LoadMap() method.
// Place dynamically allocated heap buffer in symbol_data. Caller has the
// ownership of the buffer, and should call delete [] to free the buffer.
static bool ReadSymbolFile(char **symbol_data, const string &file_name);
static bool ReadSymbolFile(const string &file_name,
char **symbol_data,
size_t *symbol_data_size);
protected:
// Users are not allowed create SourceLineResolverBase instance directly.
@ -74,10 +78,12 @@ class SourceLineResolverBase : public SourceLineResolverInterface {
virtual bool LoadModuleUsingMapBuffer(const CodeModule *module,
const string &map_buffer);
virtual bool LoadModuleUsingMemoryBuffer(const CodeModule *module,
char *memory_buffer);
char *memory_buffer,
size_t memory_buffer_size);
virtual bool ShouldDeleteMemoryBufferAfterLoadModule();
virtual void UnloadModule(const CodeModule *module);
virtual bool HasModule(const CodeModule *module);
virtual bool IsModuleCorrupt(const CodeModule *module);
virtual void FillSourceLineInfo(StackFrame *frame);
virtual WindowsFrameInfo *FindWindowsFrameInfo(const StackFrame *frame);
virtual CFIFrameInfo *FindCFIFrameInfo(const StackFrame *frame);
@ -97,6 +103,10 @@ class SourceLineResolverBase : public SourceLineResolverInterface {
typedef map<string, Module*, CompareString> ModuleMap;
ModuleMap *modules_;
// The loaded modules that were detecting to be corrupt during load.
typedef set<string, CompareString> ModuleSet;
ModuleSet *corrupt_modules_;
// All of heap-allocated buffers that are owned locally by resolver.
typedef std::map<string, char*, CompareString> MemoryMap;
MemoryMap *memory_buffers_;

View file

@ -64,12 +64,15 @@ class SourceLineResolverInterface {
virtual bool LoadModuleUsingMapBuffer(const CodeModule *module,
const string &map_buffer) = 0;
// Add an interface to load symbol using C-String data insteading string.
// Add an interface to load symbol using C-String data instead of string.
// This is useful in the optimization design for avoiding unnecessary copying
// of symbol data, in order to improve memory efficiency.
// LoadModuleUsingMemoryBuffer() does NOT take ownership of memory_buffer.
// LoadModuleUsingMemoryBuffer() null terminates the passed in buffer, if
// the last character is not a null terminator.
virtual bool LoadModuleUsingMemoryBuffer(const CodeModule *module,
char *memory_buffer) = 0;
char *memory_buffer,
size_t memory_buffer_size) = 0;
// Return true if the memory buffer should be deleted immediately after
// LoadModuleUsingMemoryBuffer(). Return false if the memory buffer has to be
@ -83,6 +86,9 @@ class SourceLineResolverInterface {
// Returns true if the module has been loaded.
virtual bool HasModule(const CodeModule *module) = 0;
// Returns true if the module has been loaded and it is corrupt.
virtual bool IsModuleCorrupt(const CodeModule *module) = 0;
// Fills in the function_base, function_name, source_file_name,
// and source_line fields of the StackFrame. The instruction and
// module_name fields must already be filled in.

View file

@ -62,7 +62,10 @@ class StackFrameSymbolizer {
kError,
// This indicates error for which stack walk should be interrupted
// and retried in future.
kInterrupt
kInterrupt,
// Symbol data was found and loaded in resolver however some corruptions
// were detected.
kWarningCorruptSymbols,
};
StackFrameSymbolizer(SymbolSupplier* supplier,

View file

@ -68,15 +68,18 @@ class Stackwalker {
// GetCallerFrame. The frames are further processed to fill all available
// data. Returns true if the stackwalk completed, or false if it was
// interrupted by SymbolSupplier::GetSymbolFile().
// Upon return, modules_without_symbols will be populated with pointers to
// Upon return, |modules_without_symbols| will be populated with pointers to
// the code modules (CodeModule*) that DON'T have symbols.
// modules_without_symbols DOES NOT take ownership of the code modules.
// |modules_with_corrupt_symbols| will be populated with pointers to the
// modules which have corrupt symbols. |modules_without_symbols| and
// |modules_with_corrupt_symbols| DO NOT take ownership of the code modules.
// The lifetime of these code modules is the same as the lifetime of the
// CodeModules passed to the StackWalker constructor (which currently
// happens to be the lifetime of the Breakpad's ProcessingState object).
// There is a check for duplicate modules so no duplicates are expected.
bool Walk(CallStack* stack,
vector<const CodeModule*>* modules_without_symbols);
vector<const CodeModule*>* modules_without_symbols,
vector<const CodeModule*>* modules_with_corrupt_symbols);
// Returns a new concrete subclass suitable for the CPU that a stack was
// generated on, according to the CPU type indicated by the context

View file

@ -87,7 +87,8 @@ class SymbolSupplier {
virtual SymbolResult GetCStringSymbolData(const CodeModule *module,
const SystemInfo *system_info,
string *symbol_file,
char **symbol_data) = 0;
char **symbol_data,
size_t *symbol_data_size) = 0;
// Frees the data buffer allocated for the module in GetCStringSymbolData.
virtual void FreeSymbolData(const CodeModule *module) = 0;