Add optional field indicating multiple symbols at an address

Adds an optional 'm' as the first field in FUNCTION and PUBLIC records
to indicate that the address corresponds to more than one symbol.
Controls this by a command line flag for now to give symbol file users
a chance to update.

Also reduces the number of IDiaSymbols retained in memory to one per
address. This reduces memory consumption by 8% when processing
chrome.dll.pdb.

Updates the processor to parse the new optional field.

Bug: google-breakpad:751
Change-Id: I6503edaf057312d21a1d63d9c84e5a4fa019dc46
Reviewed-on: https://chromium-review.googlesource.com/773418
Reviewed-by: Mark Mentovai <mark@chromium.org>
This commit is contained in:
Mike Wittman 2017-11-29 13:29:37 -08:00 committed by Mark Mentovai
parent 4eeb384f3e
commit b1226959a2
12 changed files with 324 additions and 148 deletions

View file

@ -62,6 +62,42 @@ namespace google_breakpad {
#define strtoull _strtoui64
#endif
namespace {
// Utility function to tokenize given the presence of an optional initial
// field. In this case, optional_field is the expected string for the optional
// field, and max_tokens is the maximum number of tokens including the optional
// field. Refer to the documentation for Tokenize for descriptions of the other
// arguments.
bool TokenizeWithOptionalField(char *line,
const char *optional_field,
const char *separators,
int max_tokens,
vector<char*> *tokens) {
// First tokenize assuming the optional field is not present. If we then see
// the optional field, additionally tokenize the last token into two tokens.
if (!Tokenize(line, separators, max_tokens - 1, tokens)) {
return false;
}
if (strcmp(tokens->front(), optional_field) == 0) {
// The optional field is present. Split the last token in two to recover the
// field prior to the last.
vector<char*> last_tokens;
if (!Tokenize(tokens->back(), separators, 2, &last_tokens)) {
return false;
}
// Replace the previous last token with the two new tokens.
tokens->pop_back();
tokens->push_back(last_tokens[0]);
tokens->push_back(last_tokens[1]);
}
return true;
}
} // namespace
static const char *kWhitespace = " \r\n";
static const int kMaxErrorsPrinted = 5;
static const int kMaxErrorsBeforeBailing = 100;
@ -323,13 +359,14 @@ bool BasicSourceLineResolver::Module::ParseFile(char *file_line) {
BasicSourceLineResolver::Function*
BasicSourceLineResolver::Module::ParseFunction(char *function_line) {
bool is_multiple;
uint64_t address;
uint64_t size;
long stack_param_size;
char *name;
if (SymbolParseHelper::ParseFunction(function_line, &address, &size,
&stack_param_size, &name)) {
return new Function(name, address, size, stack_param_size);
if (SymbolParseHelper::ParseFunction(function_line, &is_multiple, &address,
&size, &stack_param_size, &name)) {
return new Function(name, address, size, stack_param_size, is_multiple);
}
return NULL;
}
@ -349,11 +386,12 @@ BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine(
}
bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) {
bool is_multiple;
uint64_t address;
long stack_param_size;
char *name;
if (SymbolParseHelper::ParsePublicSymbol(public_line, &address,
if (SymbolParseHelper::ParsePublicSymbol(public_line, &is_multiple, &address,
&stack_param_size, &name)) {
// A few public symbols show up with an address of 0. This has been seen
// in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow,
@ -366,7 +404,8 @@ bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) {
}
linked_ptr<PublicSymbol> symbol(new PublicSymbol(name, address,
stack_param_size));
stack_param_size,
is_multiple));
return public_symbols_.Store(address, symbol);
}
return false;
@ -491,36 +530,39 @@ bool SymbolParseHelper::ParseFile(char *file_line, long *index,
}
// static
bool SymbolParseHelper::ParseFunction(char *function_line, uint64_t *address,
uint64_t *size, long *stack_param_size,
char **name) {
// FUNC <address> <size> <stack_param_size> <name>
bool SymbolParseHelper::ParseFunction(char *function_line, bool *is_multiple,
uint64_t *address, uint64_t *size,
long *stack_param_size, char **name) {
// FUNC [<multiple>] <address> <size> <stack_param_size> <name>
assert(strncmp(function_line, "FUNC ", 5) == 0);
function_line += 5; // skip prefix
vector<char*> tokens;
if (!Tokenize(function_line, kWhitespace, 4, &tokens)) {
if (!TokenizeWithOptionalField(function_line, "m", kWhitespace, 5, &tokens)) {
return false;
}
*is_multiple = strcmp(tokens[0], "m") == 0;
int next_token = *is_multiple ? 1 : 0;
char *after_number;
*address = strtoull(tokens[0], &after_number, 16);
*address = strtoull(tokens[next_token++], &after_number, 16);
if (!IsValidAfterNumber(after_number) ||
*address == std::numeric_limits<unsigned long long>::max()) {
return false;
}
*size = strtoull(tokens[1], &after_number, 16);
*size = strtoull(tokens[next_token++], &after_number, 16);
if (!IsValidAfterNumber(after_number) ||
*size == std::numeric_limits<unsigned long long>::max()) {
return false;
}
*stack_param_size = strtol(tokens[2], &after_number, 16);
*stack_param_size = strtol(tokens[next_token++], &after_number, 16);
if (!IsValidAfterNumber(after_number) ||
*stack_param_size == std::numeric_limits<long>::max() ||
*stack_param_size < 0) {
return false;
}
*name = tokens[3];
*name = tokens[next_token++];
return true;
}
@ -571,32 +613,35 @@ bool SymbolParseHelper::ParseLine(char *line_line, uint64_t *address,
}
// static
bool SymbolParseHelper::ParsePublicSymbol(char *public_line,
bool SymbolParseHelper::ParsePublicSymbol(char *public_line, bool *is_multiple,
uint64_t *address,
long *stack_param_size,
char **name) {
// PUBLIC <address> <stack_param_size> <name>
// PUBLIC [<multiple>] <address> <stack_param_size> <name>
assert(strncmp(public_line, "PUBLIC ", 7) == 0);
public_line += 7; // skip prefix
vector<char*> tokens;
if (!Tokenize(public_line, kWhitespace, 3, &tokens)) {
if (!TokenizeWithOptionalField(public_line, "m", kWhitespace, 4, &tokens)) {
return false;
}
*is_multiple = strcmp(tokens[0], "m") == 0;
int next_token = *is_multiple ? 1 : 0;
char *after_number;
*address = strtoull(tokens[0], &after_number, 16);
*address = strtoull(tokens[next_token++], &after_number, 16);
if (!IsValidAfterNumber(after_number) ||
*address == std::numeric_limits<unsigned long long>::max()) {
return false;
}
*stack_param_size = strtol(tokens[1], &after_number, 16);
*stack_param_size = strtol(tokens[next_token++], &after_number, 16);
if (!IsValidAfterNumber(after_number) ||
*stack_param_size == std::numeric_limits<long>::max() ||
*stack_param_size < 0) {
return false;
}
*name = tokens[2];
*name = tokens[next_token++];
return true;
}

View file

@ -60,11 +60,13 @@ BasicSourceLineResolver::Function : public SourceLineResolverBase::Function {
Function(const string &function_name,
MemAddr function_address,
MemAddr code_size,
int set_parameter_size) : Base(function_name,
function_address,
code_size,
set_parameter_size),
lines() { }
int set_parameter_size,
bool is_mutiple) : Base(function_name,
function_address,
code_size,
set_parameter_size,
is_mutiple),
lines() { }
RangeMap< MemAddr, linked_ptr<Line> > lines;
private:
typedef SourceLineResolverBase::Function Base;

View file

@ -455,16 +455,19 @@ TEST(SymbolParseHelper, ParseFileInvalid) {
}
// Test parsing of valid FUNC lines. The format is:
// FUNC <address> <size> <stack_param_size> <name>
// FUNC [<multiple>] <address> <size> <stack_param_size> <name>
TEST(SymbolParseHelper, ParseFunctionValid) {
bool multiple;
uint64_t address;
uint64_t size;
long stack_param_size;
char *name;
char kTestLine[] = "FUNC 1 2 3 function name";
ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine, &address, &size,
&stack_param_size, &name));
ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine, &multiple, &address,
&size, &stack_param_size,
&name));
EXPECT_FALSE(multiple);
EXPECT_EQ(1ULL, address);
EXPECT_EQ(2ULL, size);
EXPECT_EQ(3, stack_param_size);
@ -472,25 +475,41 @@ TEST(SymbolParseHelper, ParseFunctionValid) {
// Test hex address, size, and param size.
char kTestLine1[] = "FUNC a1 a2 a3 function name";
ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine1, &address, &size,
&stack_param_size, &name));
ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine1, &multiple, &address,
&size, &stack_param_size,
&name));
EXPECT_FALSE(multiple);
EXPECT_EQ(0xa1ULL, address);
EXPECT_EQ(0xa2ULL, size);
EXPECT_EQ(0xa3, stack_param_size);
EXPECT_EQ("function name", string(name));
char kTestLine2[] = "FUNC 0 0 0 function name";
ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine2, &address, &size,
&stack_param_size, &name));
ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine2, &multiple, &address,
&size, &stack_param_size,
&name));
EXPECT_FALSE(multiple);
EXPECT_EQ(0ULL, address);
EXPECT_EQ(0ULL, size);
EXPECT_EQ(0, stack_param_size);
EXPECT_EQ("function name", string(name));
// Test optional multiple field.
char kTestLine3[] = "FUNC m a1 a2 a3 function name";
ASSERT_TRUE(SymbolParseHelper::ParseFunction(kTestLine3, &multiple, &address,
&size, &stack_param_size,
&name));
EXPECT_TRUE(multiple);
EXPECT_EQ(0xa1ULL, address);
EXPECT_EQ(0xa2ULL, size);
EXPECT_EQ(0xa3, stack_param_size);
EXPECT_EQ("function name", string(name));
}
// Test parsing of invalid FUNC lines. The format is:
// FUNC <address> <size> <stack_param_size> <name>
// FUNC [<multiple>] <address> <size> <stack_param_size> <name>
TEST(SymbolParseHelper, ParseFunctionInvalid) {
bool multiple;
uint64_t address;
uint64_t size;
long stack_param_size;
@ -498,36 +517,49 @@ TEST(SymbolParseHelper, ParseFunctionInvalid) {
// Test missing function name.
char kTestLine[] = "FUNC 1 2 3 ";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine, &address, &size,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine, &multiple, &address,
&size, &stack_param_size,
&name));
// Test bad address.
char kTestLine1[] = "FUNC 1z 2 3 function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine1, &address, &size,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine1, &multiple, &address,
&size, &stack_param_size,
&name));
// Test large address.
char kTestLine2[] = "FUNC 123123123123123123123123123 2 3 function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine2, &address, &size,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine2, &multiple, &address,
&size, &stack_param_size,
&name));
// Test bad size.
char kTestLine3[] = "FUNC 1 z2 3 function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine3, &address, &size,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine3, &multiple, &address,
&size, &stack_param_size,
&name));
// Test large size.
char kTestLine4[] = "FUNC 1 231231231231231231231231232 3 function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine4, &address, &size,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine4, &multiple, &address,
&size, &stack_param_size,
&name));
// Test bad param size.
char kTestLine5[] = "FUNC 1 2 3z function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine5, &address, &size,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine5, &multiple, &address,
&size, &stack_param_size,
&name));
// Test large param size.
char kTestLine6[] = "FUNC 1 2 312312312312312312312312323 function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine6, &address, &size,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine6, &multiple, &address,
&size, &stack_param_size,
&name));
// Negative param size.
char kTestLine7[] = "FUNC 1 2 -5 function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine7, &address, &size,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine7, &multiple, &address,
&size, &stack_param_size,
&name));
// Test invalid optional field.
char kTestLine8[] = "FUNC x 1 2 5 function name";
ASSERT_FALSE(SymbolParseHelper::ParseFunction(kTestLine8, &multiple, &address,
&size, &stack_param_size,
&name));
}
// Test parsing of valid lines. The format is:
@ -612,67 +644,96 @@ TEST(SymbolParseHelper, ParseLineInvalid) {
}
// Test parsing of valid PUBLIC lines. The format is:
// PUBLIC <address> <stack_param_size> <name>
// PUBLIC [<multiple>] <address> <stack_param_size> <name>
TEST(SymbolParseHelper, ParsePublicSymbolValid) {
bool multiple;
uint64_t address;
long stack_param_size;
char *name;
char kTestLine[] = "PUBLIC 1 2 3";
ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &address,
&stack_param_size, &name));
ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &multiple,
&address, &stack_param_size,
&name));
EXPECT_FALSE(multiple);
EXPECT_EQ(1ULL, address);
EXPECT_EQ(2, stack_param_size);
EXPECT_EQ("3", string(name));
// Test hex size and address.
char kTestLine1[] = "PUBLIC a1 a2 function name";
ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &address,
&stack_param_size, &name));
ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &multiple,
&address, &stack_param_size,
&name));
EXPECT_FALSE(multiple);
EXPECT_EQ(0xa1ULL, address);
EXPECT_EQ(0xa2, stack_param_size);
EXPECT_EQ("function name", string(name));
// Test 0 is a valid address.
char kTestLine2[] = "PUBLIC 0 a2 function name";
ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &address,
&stack_param_size, &name));
ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &multiple,
&address, &stack_param_size,
&name));
EXPECT_FALSE(multiple);
EXPECT_EQ(0ULL, address);
EXPECT_EQ(0xa2, stack_param_size);
EXPECT_EQ("function name", string(name));
// Test optional multiple field.
char kTestLine3[] = "PUBLIC m a1 a2 function name";
ASSERT_TRUE(SymbolParseHelper::ParsePublicSymbol(kTestLine3, &multiple,
&address, &stack_param_size,
&name));
EXPECT_TRUE(multiple);
EXPECT_EQ(0xa1ULL, address);
EXPECT_EQ(0xa2, stack_param_size);
EXPECT_EQ("function name", string(name));
}
// Test parsing of invalid PUBLIC lines. The format is:
// PUBLIC <address> <stack_param_size> <name>
// PUBLIC [<multiple>] <address> <stack_param_size> <name>
TEST(SymbolParseHelper, ParsePublicSymbolInvalid) {
bool multiple;
uint64_t address;
long stack_param_size;
char *name;
// Test missing source function name.
char kTestLine[] = "PUBLIC 1 2 ";
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &address,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine, &multiple,
&address, &stack_param_size,
&name));
// Test bad address.
char kTestLine1[] = "PUBLIC 1z 2 3";
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &address,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine1, &multiple,
&address, &stack_param_size,
&name));
// Test large address.
char kTestLine2[] = "PUBLIC 123123123123123123123123 2 3";
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &address,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine2, &multiple,
&address, &stack_param_size,
&name));
// Test bad param stack size.
char kTestLine3[] = "PUBLIC 1 z2 3";
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine3, &address,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine3, &multiple,
&address, &stack_param_size,
&name));
// Test large param stack size.
char kTestLine4[] = "PUBLIC 1 123123123123123123123123123 3";
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine4, &address,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine4, &multiple,
&address, &stack_param_size,
&name));
// Test negative param stack size.
char kTestLine5[] = "PUBLIC 1 -5 3";
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine5, &address,
&stack_param_size, &name));
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine5, &multiple,
&address, &stack_param_size,
&name));
// Test invalid optional field.
char kTestLine6[] = "PUBLIC x 1 5 3";
ASSERT_FALSE(SymbolParseHelper::ParsePublicSymbol(kTestLine6, &multiple,
&address, &stack_param_size,
&name));
}
} // namespace

View file

@ -85,9 +85,10 @@ struct SourceLineResolverBase::Function {
Function(const string &function_name,
MemAddr function_address,
MemAddr code_size,
int set_parameter_size)
int set_parameter_size,
bool is_multiple)
: name(function_name), address(function_address), size(code_size),
parameter_size(set_parameter_size) { }
parameter_size(set_parameter_size), is_multiple(is_multiple) { }
string name;
MemAddr address;
@ -95,16 +96,21 @@ struct SourceLineResolverBase::Function {
// The size of parameters passed to this function on the stack.
int32_t parameter_size;
// If the function's instructions correspond to multiple symbols.
bool is_multiple;
};
struct SourceLineResolverBase::PublicSymbol {
PublicSymbol() { }
PublicSymbol(const string& set_name,
MemAddr set_address,
int set_parameter_size)
int set_parameter_size,
bool is_multiple)
: name(set_name),
address(set_address),
parameter_size(set_parameter_size) {}
parameter_size(set_parameter_size),
is_multiple(is_multiple) {}
string name;
MemAddr address;
@ -113,6 +119,9 @@ struct SourceLineResolverBase::PublicSymbol {
// is set to the size of the parameters passed to the funciton on the
// stack, if known.
int32_t parameter_size;
// If the function's instructions correspond to multiple symbols.
bool is_multiple;
};
class SourceLineResolverBase::Module {