CBMC
gdb_api.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: GDB Machine Interface API
4 
5 Author: Malte Mues <mail.mues@gmail.com>
6  Daniel Poetzl
7 
8 \*******************************************************************/
9 
14 
15 #include <cerrno>
16 #include <cstdio>
17 #include <cstring>
18 #include <regex>
19 
20 #include "gdb_api.h"
21 
22 #include <util/prefix.h>
23 #include <util/string2int.h>
24 #include <util/string_utils.h>
25 
26 #include <sys/wait.h>
27 
28 gdb_apit::gdb_apit(const std::vector<std::string> &args, const bool log)
29  : args(args), log(log), gdb_state(gdb_statet::NOT_CREATED)
30 {
31 }
32 
34 {
38 
40  return;
41 
42  write_to_gdb("-gdb-exit");
43  // we cannot use most_recent_line_has_tag() here as it checks the last line
44  // before the next `(gdb) \n` prompt in the output; however when gdb exits no
45  // next prompt is printed
47 
49 
52 
53  wait(NULL);
54 }
55 
56 size_t gdb_apit::query_malloc_size(const std::string &pointer_expr)
57 {
58  const auto maybe_address_string = get_value(pointer_expr);
59  CHECK_RETURN(maybe_address_string.has_value());
60 
61  if(allocated_memory.count(*maybe_address_string) == 0)
62  return 1;
63  else
64  return allocated_memory[*maybe_address_string];
65 }
66 
68 {
70 
71  command_log.clear();
72 
73  pid_t gdb_process;
74 
75  int pipe_input[2];
76  int pipe_output[2];
77 
78  if(pipe(pipe_input) == -1)
79  {
80  throw gdb_interaction_exceptiont("could not create pipe for stdin");
81  }
82 
83  if(pipe(pipe_output) == -1)
84  {
85  throw gdb_interaction_exceptiont("could not create pipe for stdout");
86  }
87 
88  gdb_process = fork();
89 
90  if(gdb_process == -1)
91  {
92  throw gdb_interaction_exceptiont("could not create gdb process");
93  }
94 
95  if(gdb_process == 0)
96  {
97  // child process
98  close(pipe_input[1]);
99  close(pipe_output[0]);
100 
101  dup2(pipe_input[0], STDIN_FILENO);
102  dup2(pipe_output[1], STDOUT_FILENO);
103  dup2(pipe_output[1], STDERR_FILENO);
104 
105  dprintf(pipe_output[1], "binary name: %s\n", args.front().c_str());
106 
107  std::vector<std::string> exec_cmd;
108  exec_cmd.reserve(args.size() + 3);
109  exec_cmd.push_back("gdb");
110  exec_cmd.push_back("--interpreter=mi");
111  exec_cmd.push_back("--args");
112  exec_cmd.insert(exec_cmd.end(), args.begin(), args.end());
113 
114  char **exec_cmd_ptr = static_cast<char **>(malloc(
115  sizeof(char *) * (exec_cmd.size() + 1)));
116  exec_cmd_ptr[exec_cmd.size()] = NULL;
117 
118  for(std::size_t i = 0; i < exec_cmd.size(); i++)
119  {
120  exec_cmd_ptr[i] = static_cast<char *>(malloc(
121  sizeof(char) * (exec_cmd[i].length() + 1)));
122  strcpy(exec_cmd_ptr[i], exec_cmd[i].c_str()); // NOLINT(runtime/printf)
123  }
124 
125  dprintf(pipe_output[1], "Loading gdb...\n");
126  execvp("gdb", exec_cmd_ptr);
127 
128  // Only reachable, if execvp failed
129  int errno_value = errno;
130  dprintf(pipe_output[1], "Starting gdb failed: %s\n", strerror(errno_value));
131  dprintf(pipe_output[1], "(gdb) \n");
132  throw gdb_interaction_exceptiont("could not run gdb");
133  }
134  else
135  {
136  // parent process
137  close(pipe_input[0]);
138  close(pipe_output[1]);
139 
140  // get stream for reading the gdb output
141  response_stream = fdopen(pipe_output[0], "r");
142 
143  // get stream for writing to gdb
144  command_stream = fdopen(pipe_input[1], "w");
145 
146  std::string line = read_most_recent_line();
147  if(has_prefix(line, "Starting gdb failed:"))
148  throw gdb_interaction_exceptiont(line);
149 
151 
152  CHECK_RETURN(
153  has_prefix(line, R"(~"done)") ||
154  has_prefix(line, R"(~"Reading)"));
155 
156  if(log)
157  {
158  // logs output to `gdb.txt` in the current directory, input is not logged
159  // hence we log it to `command_log`
160  write_to_gdb("-gdb-set logging on");
162  }
163 
164  write_to_gdb("-gdb-set max-value-size unlimited");
166  }
167 }
168 
169 void gdb_apit::write_to_gdb(const std::string &command)
170 {
171  PRECONDITION(!command.empty());
172  PRECONDITION(command.find('\n') == std::string::npos);
173 
174  std::string line(command);
175  line += '\n';
176 
177  if(log)
178  {
179  command_log.push_front(command);
180  }
181 
182  if(fputs(line.c_str(), command_stream) == EOF)
183  {
184  throw gdb_interaction_exceptiont("could not write a command to gdb");
185  }
186 
188 }
189 
191 {
192  PRECONDITION(log);
193  return command_log;
194 }
195 
197 {
198  std::string result;
199 
200  do
201  {
202  const size_t buf_size = 1024;
203  char buf[buf_size]; // NOLINT(runtime/arrays)
204 
205  const char *c = fgets(buf, buf_size, response_stream);
206 
207  if(c == NULL)
208  {
210  {
211  throw gdb_interaction_exceptiont("error reading from gdb");
212  }
213 
214  INVARIANT(
216  "EOF must have been reached when the error indicator on the stream "
217  "is not set and fgets returned NULL");
218  INVARIANT(
219  result.empty() || result.back() != '\n',
220  "when EOF is reached then either no characters were read or the string"
221  " read does not end in a newline");
222 
223  return result;
224  }
225 
226  std::string chunk(buf);
227  INVARIANT(!chunk.empty(), "chunk cannot be empty when EOF was not reached");
228 
229  result += chunk;
230  } while(result.back() != '\n');
231 
232  return result;
233 }
234 
236 {
237  std::string line;
238  std::string output;
239 
240  do
241  {
242  output = line;
243  line = read_next_line();
244  } while(line != "(gdb) \n");
245 
246  return output;
247 }
248 
250 gdb_apit::get_most_recent_record(const std::string &tag, const bool must_exist)
251 {
252  std::string line = read_most_recent_line();
253  const bool b = has_prefix(line, tag);
254 
255  if(must_exist)
256  {
257  CHECK_RETURN(b);
258  }
259  else if(!b)
260  {
261  throw gdb_interaction_exceptiont("record does not exist");
262  }
263 
264  std::string record = strip_string(line.substr(line.find(',') + 1));
265 
266  return parse_gdb_output_record(record);
267 }
268 
269 bool gdb_apit::most_recent_line_has_tag(const std::string &tag)
270 {
271  const std::string line = read_most_recent_line();
272  return has_prefix(line, tag);
273 }
274 
275 void gdb_apit::run_gdb_from_core(const std::string &corefile)
276 {
278 
279  // there does not seem to be a gdb mi command to run from a core file
280  const std::string command = "core " + corefile;
281 
282  write_to_gdb(command);
284 
286 }
287 
289 {
290 #if defined(__x86_64__)
291  // this is what the registers look like at the function call entry for x86-64:
292  //
293  // reg. name hex. value dec. value
294  // 0: rax 0xffffffff 4294967295
295  // 1: rbx 0x20000000 536870912
296  // 2: rcx 0x591 1425
297  // 3: rdx 0x591 1425
298  // 4: rsi 0x1 1
299  // 5: rdi 0x591 1425
300  // ...
301  // rax will eventually contain the return value and
302  // rdi now stores the first (integer) argument
303  // in the machine interface they are referred to by numbers, hence:
304  write_to_gdb("-data-list-register-values d 5");
305  auto record = get_most_recent_record("^done", true);
306  auto allocated_size = safe_string2size_t(get_register_value(record));
307 #elif defined(__i386__)
308  // x86 32-bit Linux calling conventions use the stack to pass arguments. The
309  // top of the stack is the return address, so look at the next element (+4 as
310  // the stack grows downwards).
311  write_to_gdb("-data-evaluate-expression \"*(unsigned long*)($esp + 4)\"");
312  auto record = get_most_recent_record("^done", true);
313  auto allocated_size =
314  safe_string2size_t(get_value_from_record(record, "value"));
315 #else
316 # error malloc calling conventions not know for current platform
317 #endif
318 
319  write_to_gdb("-exec-finish");
320  if(!most_recent_line_has_tag("*running"))
321  {
322  throw gdb_interaction_exceptiont("could not run program");
323  }
324  record = get_most_recent_record("*stopped");
325  auto frame_content = get_value_from_record(record, "frame");
326 
327  // the malloc breakpoint may be inside another malloc function
328  if(frame_content.find("func=\"malloc\"") != std::string::npos)
329  {
330  // so we need to finish the outer malloc as well
331  write_to_gdb("-exec-finish");
332  if(!most_recent_line_has_tag("*running"))
333  {
334  throw gdb_interaction_exceptiont("could not run program");
335  }
336  record = get_most_recent_record("*stopped");
337  }
338 
339  // now we can read the eax/rax register to the allocated memory address
340  write_to_gdb("-data-list-register-values x 0");
341  record = get_most_recent_record("^done", true);
342  allocated_memory[get_register_value(record)] = allocated_size;
343 }
344 
345 bool gdb_apit::run_gdb_to_breakpoint(const std::string &breakpoint)
346 {
348 
349  write_to_gdb("-break-insert " + malloc_name);
350  bool malloc_is_known = was_command_accepted();
351 
352  std::string command("-break-insert");
353  command += " " + breakpoint;
354 
355  write_to_gdb(command);
356  if(!was_command_accepted())
357  {
358  throw gdb_interaction_exceptiont("could not set breakpoint");
359  }
360 
361  write_to_gdb("-exec-run");
362 
363  if(!most_recent_line_has_tag("*running"))
364  {
365  throw gdb_interaction_exceptiont("could not run program");
366  }
367 
368  gdb_output_recordt record = get_most_recent_record("*stopped");
369 
370  // malloc function is known, i.e. present among the symbols
371  if(malloc_is_known)
372  {
373  // stop at every entry into malloc call
374  while(hit_malloc_breakpoint(record))
375  {
376  // and store the information about the allocated memory
378  write_to_gdb("-exec-continue");
379  if(!most_recent_line_has_tag("*running"))
380  {
381  throw gdb_interaction_exceptiont("could not run program");
382  }
383  record = get_most_recent_record("*stopped");
384  }
385 
386  write_to_gdb("-break-delete 1");
387  if(!was_command_accepted())
388  {
389  throw gdb_interaction_exceptiont("could not delete breakpoint at malloc");
390  }
391  }
392 
393  const auto it = record.find("reason");
394  CHECK_RETURN(it != record.end());
395 
396  const std::string &reason = it->second;
397 
398  if(reason == "breakpoint-hit")
399  {
401  return true;
402  }
403  else if(reason == "exited-normally")
404  {
405  return false;
406  }
407  else
408  {
410  "gdb stopped for unhandled reason `" + reason + "`");
411  }
412 
413  UNREACHABLE;
414 }
415 
416 std::string gdb_apit::eval_expr(const std::string &expr)
417 {
418  write_to_gdb("-var-create tmp * " + expr);
419 
420  if(!was_command_accepted())
421  {
423  "could not create variable for expression `" + expr + "`");
424  }
425 
426  write_to_gdb("-var-evaluate-expression tmp");
427  gdb_output_recordt record = get_most_recent_record("^done", true);
428 
429  write_to_gdb("-var-delete tmp");
431 
432  const auto it = record.find("value");
433  CHECK_RETURN(it != record.end());
434 
435  const std::string value = it->second;
436 
437  INVARIANT(
438  value.back() != '"' ||
439  (value.length() >= 2 && value[value.length() - 2] == '\\'),
440  "quotes should have been stripped off from value");
441  INVARIANT(value.back() != '\n', "value should not end in a newline");
442 
443  return value;
444 }
445 
446 gdb_apit::pointer_valuet gdb_apit::get_memory(const std::string &expr)
447 {
449 
450  std::string value;
451  try
452  {
453  value = eval_expr(expr);
454  }
456  {
457  return pointer_valuet{};
458  }
459 
460  std::regex regex(
461  r_hex_addr + r_opt(' ' + r_id) + r_opt(' ' + r_or(r_char, r_string)));
462 
463  std::smatch result;
464  const bool b = regex_match(value, result, regex);
465  if(!b)
466  return pointer_valuet{};
467 
468  std::optional<std::string> opt_string;
469  const std::string string = result[4];
470 
471  if(!string.empty())
472  {
473  const std::size_t len = string.length();
474 
475  INVARIANT(
476  len >= 4,
477  "pointer-string should be: backslash, quotes, .., backslash, quotes");
478  INVARIANT(
479  string[0] == '\\',
480  "pointer-string should be: backslash, quotes, .., backslash, quotes");
481  INVARIANT(
482  string[1] == '"',
483  "pointer-string should be: backslash, quotes, .., backslash, quotes");
484  INVARIANT(
485  string[len - 2] == '\\',
486  "pointer-string should be: backslash, quotes, .., backslash, quotes");
487  INVARIANT(
488  string[len - 1] == '"',
489  "pointer-string should be: backslash, quotes, .., backslash, quotes");
490 
491  opt_string = string.substr(2, len - 4);
492  }
493 
494  return pointer_valuet(result[1], result[2], result[3], opt_string, true);
495 }
496 
497 std::optional<std::string> gdb_apit::get_value(const std::string &expr)
498 {
500 
501  std::string value;
502  try
503  {
504  value = eval_expr(expr);
505  }
507  {
508  return {};
509  }
510 
511  // Get char value
512  {
513  // matches e.g. 99 'c' and extracts c
514  std::regex regex(R"([^ ]+ '([^']+)')");
515 
516  std::smatch result;
517  const bool b = regex_match(value, result, regex);
518 
519  if(b)
520  {
521  return std::string{result[1]};
522  }
523  }
524 
525  // return raw value
526  return value;
527 }
528 
530 gdb_apit::parse_gdb_output_record(const std::string &s)
531 {
532  PRECONDITION(s.back() != '\n');
533 
534  gdb_output_recordt result;
535 
536  std::size_t depth = 0;
537  std::string::size_type start = 0;
538 
539  const std::string::size_type n = s.length();
540 
541  for(std::string::size_type i = 0; i < n; i++)
542  {
543  const char c = s[i];
544 
545  if(c == '{' || c == '[')
546  {
547  depth++;
548  }
549  else if(c == '}' || c == ']')
550  {
551  depth--;
552  }
553 
554  if(depth == 0 && (c == ',' || i == n - 1))
555  {
556  const std::string item =
557  i == n - 1 ? s.substr(start) : s.substr(start, i - start);
558 
559  // Split on first `=`
560  std::string::size_type j = item.find('=');
561  CHECK_RETURN(j != std::string::npos);
562  CHECK_RETURN(j > 0);
563  CHECK_RETURN(j < s.length());
564 
565  const std::string key = strip_string(item.substr(0, j));
566  std::string value = strip_string(item.substr(j + 1));
567 
568  const char first = value.front();
569  const char last = value.back();
570 
571  INVARIANT(first == '"' || first == '{' || first == '[', "");
572  INVARIANT(first != '"' || last == '"', "");
573  INVARIANT(first != '{' || last == '}', "");
574  INVARIANT(first != '[' || last == ']', "");
575 
576  // Remove enclosing `"` for primitive values
577  if(first == '"')
578  {
579  value = value.substr(1, value.length() - 2);
580  }
581 
582  auto r = result.insert(std::make_pair(key, value));
583  CHECK_RETURN(r.second);
584 
585  start = i + 1;
586  }
587  }
588 
589  return result;
590 }
591 
593 {
594  return most_recent_line_has_tag("^done");
595 }
596 
598 {
599  bool was_accepted = was_command_accepted();
600  CHECK_RETURN(was_accepted);
601 }
602 
603 std::string gdb_apit::r_opt(const std::string &regex)
604 {
605  return R"((?:)" + regex + R"()?)";
606 }
607 
608 std::string
609 gdb_apit::r_or(const std::string &regex_left, const std::string &regex_right)
610 {
611  return R"((?:)" + regex_left + '|' + regex_right + R"())";
612 }
613 
615  const gdb_output_recordt &record,
616  const std::string &value_name)
617 {
618  const auto it = record.find(value_name);
619  CHECK_RETURN(it != record.end());
620  const auto value = it->second;
621 
622  INVARIANT(
623  value.back() != '"' ||
624  (value.length() >= 2 && value[value.length() - 2] == '\\'),
625  "quotes should have been stripped off from value");
626  INVARIANT(value.back() != '\n', "value should not end in a newline");
627 
628  return value;
629 }
630 
631 bool gdb_apit::hit_malloc_breakpoint(const gdb_output_recordt &stopped_record)
632 {
633  const auto it = stopped_record.find("reason");
634  CHECK_RETURN(it != stopped_record.end());
635 
636  if(it->second != "breakpoint-hit")
637  return false;
638 
639  return safe_string2size_t(get_value_from_record(stopped_record, "bkptno")) ==
640  1;
641 }
642 
643 std::string gdb_apit::get_register_value(const gdb_output_recordt &record)
644 {
645  // we expect the record of form:
646  // {[register-values]->[name=name_string, value=\"value_string\"],..}
647  auto record_value = get_value_from_record(record, "register-values");
648  std::string value_eq_quotes = "value=\"";
649  auto value_eq_quotes_size = value_eq_quotes.size();
650 
651  auto starting_pos = record_value.find(value_eq_quotes) + value_eq_quotes_size;
652  auto ending_pos = record_value.find('\"', starting_pos);
653  auto value_length = ending_pos - starting_pos;
654  return std::string{record_value, starting_pos, value_length};
655 }
static std::string r_opt(const std::string &regex)
bool was_command_accepted()
void check_command_accepted()
bool run_gdb_to_breakpoint(const std::string &breakpoint)
Run gdb to the given breakpoint.
Definition: gdb_api.cpp:345
void collect_malloc_calls()
Intercepts the gdb-analysis at the malloc call-site to add the corresponding information into allocat...
Definition: gdb_api.cpp:288
FILE * command_stream
Definition: gdb_api.h:146
void create_gdb_process()
Create a new gdb process for analysing the binary indicated by the first element in args
Definition: gdb_api.cpp:67
std::map< std::string, size_t > allocated_memory
track the allocated size for each malloc call maps hexadecimal address to the number of bytes
Definition: gdb_api.h:162
gdb_statet gdb_state
Definition: gdb_api.h:158
const std::string r_char
Definition: gdb_api.h:218
const std::string r_hex_addr
Definition: gdb_api.h:210
void write_to_gdb(const std::string &command)
Definition: gdb_api.cpp:169
std::map< std::string, std::string > gdb_output_recordt
Definition: gdb_api.h:164
void run_gdb_from_core(const std::string &corefile)
Run gdb with the given core file.
Definition: gdb_api.cpp:275
const commandst & get_command_log()
Return the vector of commands that have been written to gdb so far.
Definition: gdb_api.cpp:190
std::optional< std::string > get_value(const std::string &expr)
Get the memory address pointed to by the given pointer expression.
gdb_statet
Definition: gdb_api.h:152
std::string eval_expr(const std::string &expr)
Definition: gdb_api.cpp:416
const std::string r_id
Definition: gdb_api.h:214
std::string read_next_line()
Definition: gdb_api.cpp:196
const std::string r_string
Definition: gdb_api.h:222
const bool log
Definition: gdb_api.h:148
std::forward_list< std::string > commandst
Definition: gdb_api.h:32
FILE * response_stream
Definition: gdb_api.h:145
std::string get_register_value(const gdb_output_recordt &record)
Parse the record produced by listing register value.
gdb_output_recordt get_most_recent_record(const std::string &tag, const bool must_exist=false)
Definition: gdb_api.cpp:250
const std::string malloc_name
Definition: gdb_api.h:225
static std::string r_or(const std::string &regex_left, const std::string &regex_right)
std::string get_value_from_record(const gdb_output_recordt &record, const std::string &value_name)
Locate and return the value for a given name.
static gdb_output_recordt parse_gdb_output_record(const std::string &s)
std::vector< std::string > args
Definition: gdb_api.h:143
pointer_valuet get_memory(const std::string &expr)
Get the value of a pointer associated with expr.
size_t query_malloc_size(const std::string &pointer_expr)
Get the exact allocated size for a pointer pointer_expr.
Definition: gdb_api.cpp:56
gdb_apit(const std::vector< std::string > &args, const bool log=false)
Create a gdb_apit object.
Definition: gdb_api.cpp:28
std::string read_most_recent_line()
Definition: gdb_api.cpp:235
commandst command_log
Definition: gdb_api.h:149
bool most_recent_line_has_tag(const std::string &tag)
Definition: gdb_api.cpp:269
~gdb_apit()
Terminate the gdb process and close open streams (for reading from and writing to gdb)
Definition: gdb_api.cpp:33
bool hit_malloc_breakpoint(const gdb_output_recordt &stopped_record)
Check if the breakpoint we hit is inside a malloc.
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
Low-level interface to gdb.
static int8_t r
Definition: irep_hash.h:60
double log(double x)
Definition: math.c:2776
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:525
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
int dprintf(int fd, const char *restrict format,...)
Definition: stdio.c:1615
FILE * fdopen(int handle, const char *mode)
Definition: stdio.c:256
int ferror(FILE *stream)
Definition: stdio.c:543
int fputs(const char *s, FILE *stream)
Definition: stdio.c:611
int fclose(FILE *stream)
Definition: stdio.c:230
char * fgets(char *str, int size, FILE *stream)
Definition: stdio.c:321
int feof(FILE *stream)
Definition: stdio.c:511
int fflush(FILE *stream)
Definition: stdio.c:647
void * malloc(__CPROVER_size_t malloc_size)
Definition: stdlib.c:212
std::size_t safe_string2size_t(const std::string &str, int base)
Definition: string2int.cpp:23
char * strcpy(char *dst, const char *src)
Definition: string.c:139
char * strerror(int errnum)
Definition: string.c:1014
std::string strip_string(const std::string &s)
Remove all whitespace characters from either end of a string.
Data associated with the value of a pointer, i.e.
Definition: gdb_api.h:77
int close(int fildes)
Definition: unistd.c:139
#define size_type
Definition: unistd.c:347
int pipe(int fildes[2])
Definition: unistd.c:90