CBMC
Loading...
Searching...
No Matches
gdb_api.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: GDB Machine Interface API
4
5Author: Malte Mues <mail.mues@gmail.com>
6 Daniel Poetzl
7
8\*******************************************************************/
9
14
15#include <cerrno>
16#include <cstdio>
17#include <cstring>
18#include <regex>
19
20#include "gdb_api.h"
21
22#include <util/prefix.h>
23#include <util/string2int.h>
24#include <util/string_utils.h>
25
26#include <sys/wait.h>
27
28gdb_apit::gdb_apit(const std::vector<std::string> &args, const bool log)
29 : args(args), log(log), gdb_state(gdb_statet::NOT_CREATED)
30{
31}
32
34{
38
40 return;
41
42 write_to_gdb("-gdb-exit");
43 // we cannot use most_recent_line_has_tag() here as it checks the last line
44 // before the next `(gdb) \n` prompt in the output; however when gdb exits no
45 // next prompt is printed
47
49
52
53 wait(NULL);
54}
55
56size_t gdb_apit::query_malloc_size(const std::string &pointer_expr)
57{
58 const auto maybe_address_string = get_value(pointer_expr);
60
62 return 1;
63 else
65}
66
68{
70
71 command_log.clear();
72
74
75 int pipe_input[2];
76 int pipe_output[2];
77
78 if(pipe(pipe_input) == -1)
79 {
80 throw gdb_interaction_exceptiont("could not create pipe for stdin");
81 }
82
83 if(pipe(pipe_output) == -1)
84 {
85 throw gdb_interaction_exceptiont("could not create pipe for stdout");
86 }
87
88 gdb_process = fork();
89
90 if(gdb_process == -1)
91 {
92 throw gdb_interaction_exceptiont("could not create gdb process");
93 }
94
95 if(gdb_process == 0)
96 {
97 // child process
98 close(pipe_input[1]);
99 close(pipe_output[0]);
100
101 dup2(pipe_input[0], STDIN_FILENO);
102 dup2(pipe_output[1], STDOUT_FILENO);
103 dup2(pipe_output[1], STDERR_FILENO);
104
105 dprintf(pipe_output[1], "binary name: %s\n", args.front().c_str());
106
107 std::vector<std::string> exec_cmd;
108 exec_cmd.reserve(args.size() + 3);
109 exec_cmd.push_back("gdb");
110 exec_cmd.push_back("--interpreter=mi");
111 exec_cmd.push_back("--args");
112 exec_cmd.insert(exec_cmd.end(), args.begin(), args.end());
113
114 char **exec_cmd_ptr = static_cast<char **>(malloc(
115 sizeof(char *) * (exec_cmd.size() + 1)));
116 exec_cmd_ptr[exec_cmd.size()] = NULL;
117
118 for(std::size_t i = 0; i < exec_cmd.size(); i++)
119 {
120 exec_cmd_ptr[i] = static_cast<char *>(malloc(
121 sizeof(char) * (exec_cmd[i].length() + 1)));
122 strcpy(exec_cmd_ptr[i], exec_cmd[i].c_str()); // NOLINT(runtime/printf)
123 }
124
125 dprintf(pipe_output[1], "Loading gdb...\n");
126 execvp("gdb", exec_cmd_ptr);
127
128 // Only reachable, if execvp failed
129 int errno_value = errno;
130 dprintf(pipe_output[1], "Starting gdb failed: %s\n", strerror(errno_value));
131 dprintf(pipe_output[1], "(gdb) \n");
132 throw gdb_interaction_exceptiont("could not run gdb");
133 }
134 else
135 {
136 // parent process
137 close(pipe_input[0]);
138 close(pipe_output[1]);
139
140 // get stream for reading the gdb output
141 response_stream = fdopen(pipe_output[0], "r");
142
143 // get stream for writing to gdb
144 command_stream = fdopen(pipe_input[1], "w");
145
146 std::string line = read_most_recent_line();
147 if(has_prefix(line, "Starting gdb failed:"))
148 throw gdb_interaction_exceptiont(line);
149
151
153 has_prefix(line, R"(~"done)") ||
154 has_prefix(line, R"(~"Reading)"));
155
156 if(log)
157 {
158 // logs output to `gdb.txt` in the current directory, input is not logged
159 // hence we log it to `command_log`
160 write_to_gdb("-gdb-set logging on");
162 }
163
164 write_to_gdb("-gdb-set max-value-size unlimited");
166 }
167}
168
169void gdb_apit::write_to_gdb(const std::string &command)
170{
171 PRECONDITION(!command.empty());
172 PRECONDITION(command.find('\n') == std::string::npos);
173
174 std::string line(command);
175 line += '\n';
176
177 if(log)
178 {
179 command_log.push_front(command);
180 }
181
182 if(fputs(line.c_str(), command_stream) == EOF)
183 {
184 throw gdb_interaction_exceptiont("could not write a command to gdb");
185 }
186
188}
189
195
197{
198 std::string result;
199
200 do
201 {
202 const size_t buf_size = 1024;
203 char buf[buf_size]; // NOLINT(runtime/arrays)
204
205 const char *c = fgets(buf, buf_size, response_stream);
206
207 if(c == NULL)
208 {
210 {
211 throw gdb_interaction_exceptiont("error reading from gdb");
212 }
213
214 INVARIANT(
216 "EOF must have been reached when the error indicator on the stream "
217 "is not set and fgets returned NULL");
218 INVARIANT(
219 result.empty() || result.back() != '\n',
220 "when EOF is reached then either no characters were read or the string"
221 " read does not end in a newline");
222
223 return result;
224 }
225
226 std::string chunk(buf);
227 INVARIANT(!chunk.empty(), "chunk cannot be empty when EOF was not reached");
228
229 result += chunk;
230 } while(result.back() != '\n');
231
232 return result;
233}
234
236{
237 std::string line;
238 std::string output;
239
240 do
241 {
242 output = line;
243 line = read_next_line();
244 } while(line != "(gdb) \n");
245
246 return output;
247}
248
250gdb_apit::get_most_recent_record(const std::string &tag, const bool must_exist)
251{
252 std::string line = read_most_recent_line();
253 const bool b = has_prefix(line, tag);
254
255 if(must_exist)
256 {
258 }
259 else if(!b)
260 {
261 throw gdb_interaction_exceptiont("record does not exist");
262 }
263
264 std::string record = strip_string(line.substr(line.find(',') + 1));
265
266 return parse_gdb_output_record(record);
267}
268
269bool gdb_apit::most_recent_line_has_tag(const std::string &tag)
270{
271 const std::string line = read_most_recent_line();
272 return has_prefix(line, tag);
273}
274
275void gdb_apit::run_gdb_from_core(const std::string &corefile)
276{
278
279 // there does not seem to be a gdb mi command to run from a core file
280 const std::string command = "core " + corefile;
281
282 write_to_gdb(command);
284
286}
287
289{
290#if defined(__x86_64__)
291 // this is what the registers look like at the function call entry for x86-64:
292 //
293 // reg. name hex. value dec. value
294 // 0: rax 0xffffffff 4294967295
295 // 1: rbx 0x20000000 536870912
296 // 2: rcx 0x591 1425
297 // 3: rdx 0x591 1425
298 // 4: rsi 0x1 1
299 // 5: rdi 0x591 1425
300 // ...
301 // rax will eventually contain the return value and
302 // rdi now stores the first (integer) argument
303 // in the machine interface they are referred to by numbers, hence:
304 write_to_gdb("-data-list-register-values d 5");
305 auto record = get_most_recent_record("^done", true);
307#elif defined(__i386__)
308 // x86 32-bit Linux calling conventions use the stack to pass arguments. The
309 // top of the stack is the return address, so look at the next element (+4 as
310 // the stack grows downwards).
311 write_to_gdb("-data-evaluate-expression \"*(unsigned long*)($esp + 4)\"");
312 auto record = get_most_recent_record("^done", true);
313 auto allocated_size =
315#else
316# error malloc calling conventions not know for current platform
317#endif
318
319 write_to_gdb("-exec-finish");
320 if(!most_recent_line_has_tag("*running"))
321 {
322 throw gdb_interaction_exceptiont("could not run program");
323 }
324 record = get_most_recent_record("*stopped");
325 auto frame_content = get_value_from_record(record, "frame");
326
327 // the malloc breakpoint may be inside another malloc function
328 if(frame_content.find("func=\"malloc\"") != std::string::npos)
329 {
330 // so we need to finish the outer malloc as well
331 write_to_gdb("-exec-finish");
332 if(!most_recent_line_has_tag("*running"))
333 {
334 throw gdb_interaction_exceptiont("could not run program");
335 }
336 record = get_most_recent_record("*stopped");
337 }
338
339 // now we can read the eax/rax register to the allocated memory address
340 write_to_gdb("-data-list-register-values x 0");
341 record = get_most_recent_record("^done", true);
343}
344
346{
348
349 write_to_gdb("-break-insert " + malloc_name);
351
352 std::string command("-break-insert");
353 command += " " + breakpoint;
354
355 write_to_gdb(command);
357 {
358 throw gdb_interaction_exceptiont("could not set breakpoint");
359 }
360
361 write_to_gdb("-exec-run");
362
363 if(!most_recent_line_has_tag("*running"))
364 {
365 throw gdb_interaction_exceptiont("could not run program");
366 }
367
368 gdb_output_recordt record = get_most_recent_record("*stopped");
369
370 // malloc function is known, i.e. present among the symbols
372 {
373 // stop at every entry into malloc call
374 while(hit_malloc_breakpoint(record))
375 {
376 // and store the information about the allocated memory
378 write_to_gdb("-exec-continue");
379 if(!most_recent_line_has_tag("*running"))
380 {
381 throw gdb_interaction_exceptiont("could not run program");
382 }
383 record = get_most_recent_record("*stopped");
384 }
385
386 write_to_gdb("-break-delete 1");
388 {
389 throw gdb_interaction_exceptiont("could not delete breakpoint at malloc");
390 }
391 }
392
393 const auto it = record.find("reason");
394 CHECK_RETURN(it != record.end());
395
396 const std::string &reason = it->second;
397
398 if(reason == "breakpoint-hit")
399 {
401 return true;
402 }
403 else if(reason == "exited-normally")
404 {
405 return false;
406 }
407 else
408 {
410 "gdb stopped for unhandled reason `" + reason + "`");
411 }
412
414}
415
416std::string gdb_apit::eval_expr(const std::string &expr)
417{
418 write_to_gdb("-var-create tmp * " + expr);
419
421 {
423 "could not create variable for expression `" + expr + "`");
424 }
425
426 write_to_gdb("-var-evaluate-expression tmp");
427 gdb_output_recordt record = get_most_recent_record("^done", true);
428
429 write_to_gdb("-var-delete tmp");
431
432 const auto it = record.find("value");
433 CHECK_RETURN(it != record.end());
434
435 const std::string value = it->second;
436
437 INVARIANT(
438 value.back() != '"' ||
439 (value.length() >= 2 && value[value.length() - 2] == '\\'),
440 "quotes should have been stripped off from value");
441 INVARIANT(value.back() != '\n', "value should not end in a newline");
442
443 return value;
444}
445
446gdb_apit::pointer_valuet gdb_apit::get_memory(const std::string &expr)
447{
449
450 std::string value;
451 try
452 {
453 value = eval_expr(expr);
454 }
456 {
457 return pointer_valuet{};
458 }
459
460 std::regex regex(
461 r_hex_addr + r_opt(' ' + r_id) + r_opt(' ' + r_or(r_char, r_string)));
462
463 std::smatch result;
464 const bool b = regex_match(value, result, regex);
465 if(!b)
466 return pointer_valuet{};
467
468 std::optional<std::string> opt_string;
469 const std::string string = result[4];
470
471 if(!string.empty())
472 {
473 const std::size_t len = string.length();
474
475 INVARIANT(
476 len >= 4,
477 "pointer-string should be: backslash, quotes, .., backslash, quotes");
478 INVARIANT(
479 string[0] == '\\',
480 "pointer-string should be: backslash, quotes, .., backslash, quotes");
481 INVARIANT(
482 string[1] == '"',
483 "pointer-string should be: backslash, quotes, .., backslash, quotes");
484 INVARIANT(
485 string[len - 2] == '\\',
486 "pointer-string should be: backslash, quotes, .., backslash, quotes");
487 INVARIANT(
488 string[len - 1] == '"',
489 "pointer-string should be: backslash, quotes, .., backslash, quotes");
490
491 opt_string = string.substr(2, len - 4);
492 }
493
494 return pointer_valuet(result[1], result[2], result[3], opt_string, true);
495}
496
497std::optional<std::string> gdb_apit::get_value(const std::string &expr)
498{
500
501 std::string value;
502 try
503 {
504 value = eval_expr(expr);
505 }
507 {
508 return {};
509 }
510
511 // Get char value
512 {
513 // matches e.g. 99 'c' and extracts c
514 std::regex regex(R"([^ ]+ '([^']+)')");
515
516 std::smatch result;
517 const bool b = regex_match(value, result, regex);
518
519 if(b)
520 {
521 return std::string{result[1]};
522 }
523 }
524
525 // return raw value
526 return value;
527}
528
530gdb_apit::parse_gdb_output_record(const std::string &s)
531{
532 PRECONDITION(s.back() != '\n');
533
534 gdb_output_recordt result;
535
536 std::size_t depth = 0;
537 std::string::size_type start = 0;
538
539 const std::string::size_type n = s.length();
540
541 for(std::string::size_type i = 0; i < n; i++)
542 {
543 const char c = s[i];
544
545 if(c == '{' || c == '[')
546 {
547 depth++;
548 }
549 else if(c == '}' || c == ']')
550 {
551 depth--;
552 }
553
554 if(depth == 0 && (c == ',' || i == n - 1))
555 {
556 const std::string item =
557 i == n - 1 ? s.substr(start) : s.substr(start, i - start);
558
559 // Split on first `=`
560 std::string::size_type j = item.find('=');
561 CHECK_RETURN(j != std::string::npos);
562 CHECK_RETURN(j > 0);
563 CHECK_RETURN(j < s.length());
564
565 const std::string key = strip_string(item.substr(0, j));
566 std::string value = strip_string(item.substr(j + 1));
567
568 const char first = value.front();
569 const char last = value.back();
570
571 INVARIANT(first == '"' || first == '{' || first == '[', "");
572 INVARIANT(first != '"' || last == '"', "");
573 INVARIANT(first != '{' || last == '}', "");
574 INVARIANT(first != '[' || last == ']', "");
575
576 // Remove enclosing `"` for primitive values
577 if(first == '"')
578 {
579 value = value.substr(1, value.length() - 2);
580 }
581
582 auto r = result.insert(std::make_pair(key, value));
583 CHECK_RETURN(r.second);
584
585 start = i + 1;
586 }
587 }
588
589 return result;
590}
591
593{
594 return most_recent_line_has_tag("^done");
595}
596
598{
601}
602
603std::string gdb_apit::r_opt(const std::string &regex)
604{
605 return R"((?:)" + regex + R"()?)";
606}
607
608std::string
609gdb_apit::r_or(const std::string &regex_left, const std::string &regex_right)
610{
611 return R"((?:)" + regex_left + '|' + regex_right + R"())";
612}
613
615 const gdb_output_recordt &record,
616 const std::string &value_name)
617{
618 const auto it = record.find(value_name);
619 CHECK_RETURN(it != record.end());
620 const auto value = it->second;
621
622 INVARIANT(
623 value.back() != '"' ||
624 (value.length() >= 2 && value[value.length() - 2] == '\\'),
625 "quotes should have been stripped off from value");
626 INVARIANT(value.back() != '\n', "value should not end in a newline");
627
628 return value;
629}
630
631bool gdb_apit::hit_malloc_breakpoint(const gdb_output_recordt &stopped_record)
632{
633 const auto it = stopped_record.find("reason");
634 CHECK_RETURN(it != stopped_record.end());
635
636 if(it->second != "breakpoint-hit")
637 return false;
638
640 1;
641}
642
643std::string gdb_apit::get_register_value(const gdb_output_recordt &record)
644{
645 // we expect the record of form:
646 // {[register-values]->[name=name_string, value=\"value_string\"],..}
647 auto record_value = get_value_from_record(record, "register-values");
648 std::string value_eq_quotes = "value=\"";
650
652 auto ending_pos = record_value.find('\"', starting_pos);
654 return std::string{record_value, starting_pos, value_length};
655}
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
static std::string r_opt(const std::string &regex)
bool was_command_accepted()
void check_command_accepted()
bool run_gdb_to_breakpoint(const std::string &breakpoint)
Run gdb to the given breakpoint.
Definition gdb_api.cpp:345
void collect_malloc_calls()
Intercepts the gdb-analysis at the malloc call-site to add the corresponding information into allocat...
Definition gdb_api.cpp:288
FILE * command_stream
Definition gdb_api.h:146
void create_gdb_process()
Create a new gdb process for analysing the binary indicated by the first element in args
Definition gdb_api.cpp:67
std::map< std::string, size_t > allocated_memory
track the allocated size for each malloc call maps hexadecimal address to the number of bytes
Definition gdb_api.h:162
gdb_statet gdb_state
Definition gdb_api.h:158
const std::string r_char
Definition gdb_api.h:218
const std::string r_hex_addr
Definition gdb_api.h:210
void write_to_gdb(const std::string &command)
Definition gdb_api.cpp:169
std::map< std::string, std::string > gdb_output_recordt
Definition gdb_api.h:164
void run_gdb_from_core(const std::string &corefile)
Run gdb with the given core file.
Definition gdb_api.cpp:275
const commandst & get_command_log()
Return the vector of commands that have been written to gdb so far.
Definition gdb_api.cpp:190
std::string eval_expr(const std::string &expr)
Definition gdb_api.cpp:416
const std::string r_id
Definition gdb_api.h:214
std::string read_next_line()
Definition gdb_api.cpp:196
std::optional< std::string > get_value(const std::string &expr)
Get the memory address pointed to by the given pointer expression.
const std::string r_string
Definition gdb_api.h:222
const bool log
Definition gdb_api.h:148
std::forward_list< std::string > commandst
Definition gdb_api.h:32
FILE * response_stream
Definition gdb_api.h:145
std::string get_register_value(const gdb_output_recordt &record)
Parse the record produced by listing register value.
gdb_output_recordt get_most_recent_record(const std::string &tag, const bool must_exist=false)
Definition gdb_api.cpp:250
const std::string malloc_name
Definition gdb_api.h:225
static std::string r_or(const std::string &regex_left, const std::string &regex_right)
std::string get_value_from_record(const gdb_output_recordt &record, const std::string &value_name)
Locate and return the value for a given name.
static gdb_output_recordt parse_gdb_output_record(const std::string &s)
std::vector< std::string > args
Definition gdb_api.h:143
pointer_valuet get_memory(const std::string &expr)
Get the value of a pointer associated with expr.
size_t query_malloc_size(const std::string &pointer_expr)
Get the exact allocated size for a pointer pointer_expr.
Definition gdb_api.cpp:56
gdb_apit(const std::vector< std::string > &args, const bool log=false)
Create a gdb_apit object.
Definition gdb_api.cpp:28
std::string read_most_recent_line()
Definition gdb_api.cpp:235
commandst command_log
Definition gdb_api.h:149
bool most_recent_line_has_tag(const std::string &tag)
Definition gdb_api.cpp:269
~gdb_apit()
Terminate the gdb process and close open streams (for reading from and writing to gdb)
Definition gdb_api.cpp:33
bool hit_malloc_breakpoint(const gdb_output_recordt &stopped_record)
Check if the breakpoint we hit is inside a malloc.
bool has_prefix(const std::string &s, const std::string &prefix)
Definition converter.cpp:13
Low-level interface to gdb.
static int8_t r
Definition irep_hash.h:60
double log(double x)
Definition math.c:2776
#define CHECK_RETURN(CONDITION)
Definition invariant.h:495
#define UNREACHABLE
This should be used to mark dead code.
Definition invariant.h:525
#define PRECONDITION(CONDITION)
Definition invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
int dprintf(int fd, const char *restrict format,...)
Definition stdio.c:1669
FILE * fdopen(int handle, const char *mode)
Definition stdio.c:256
int ferror(FILE *stream)
Definition stdio.c:543
char * fgets(char *str, int size, FILE *stream)
Definition stdio.c:321
int fputs(const char *s, FILE *stream)
Definition stdio.c:611
int fclose(FILE *stream)
Definition stdio.c:230
int feof(FILE *stream)
Definition stdio.c:511
int fflush(FILE *stream)
Definition stdio.c:647
void * malloc(__CPROVER_size_t malloc_size)
Definition stdlib.c:212
std::size_t safe_string2size_t(const std::string &str, int base)
char * strerror(int errnum)
Definition string.c:1014
char * strcpy(char *dst, const char *src)
Definition string.c:139
std::string strip_string(const std::string &s)
Remove all whitespace characters from either end of a string.
Data associated with the value of a pointer, i.e.
Definition gdb_api.h:77
int close(int fildes)
Definition unistd.c:139
int pipe(int fildes[2])
Definition unistd.c:90