cpp_re.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. /*=============================================================================
  2. Boost.Wave: A Standard compliant C++ preprocessor library
  3. Re2C based C++ lexer
  4. http://www.boost.org/
  5. Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
  6. Software License, Version 1.0. (See accompanying file
  7. LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8. =============================================================================*/
  9. #if !defined(BOOST_CPP_RE_HPP_B76C4F5E_63E9_4B8A_9975_EC32FA6BF027_INCLUDED)
  10. #define BOOST_CPP_RE_HPP_B76C4F5E_63E9_4B8A_9975_EC32FA6BF027_INCLUDED
  11. #include <boost/assert.hpp>
  12. #include <boost/wave/wave_config.hpp>
  13. #include <boost/wave/token_ids.hpp>
  14. #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
  15. // this must occur after all of the includes and before any code appears
  16. #ifdef BOOST_HAS_ABI_HEADERS
  17. #include BOOST_ABI_PREFIX
  18. #endif
  19. // suppress warnings about dependent classes not being exported from the dll
  20. #ifdef BOOST_MSVC
  21. #pragma warning(push)
  22. #pragma warning(disable : 4251 4231 4660)
  23. #endif
  24. ///////////////////////////////////////////////////////////////////////////////
  25. #define YYCTYPE uchar
  26. #define YYCURSOR cursor
  27. #define YYLIMIT limit
  28. #define YYMARKER marker
  29. #define YYFILL(n) \
  30. { \
  31. cursor = uchar_wrapper(fill(s, cursor), cursor.column); \
  32. limit = uchar_wrapper (s->lim); \
  33. } \
  34. /**/
  35. #include <iosfwd>
  36. ///////////////////////////////////////////////////////////////////////////////
  37. #define BOOST_WAVE_UPDATE_CURSOR() \
  38. { \
  39. s->line += count_backslash_newlines(s, cursor); \
  40. s->curr_column = cursor.column; \
  41. s->cur = cursor; \
  42. s->lim = limit; \
  43. s->ptr = marker; \
  44. } \
  45. /**/
  46. ///////////////////////////////////////////////////////////////////////////////
  47. #define BOOST_WAVE_RET(i) \
  48. { \
  49. BOOST_WAVE_UPDATE_CURSOR() \
  50. if (s->cur > s->lim) \
  51. return T_EOF; /* may happen for empty files */ \
  52. return (i); \
  53. } \
  54. /**/
  55. ///////////////////////////////////////////////////////////////////////////////
  56. namespace boost {
  57. namespace wave {
  58. namespace cpplexer {
  59. namespace re2clex {
  60. template<typename Iterator>
  61. struct Scanner;
  62. ///////////////////////////////////////////////////////////////////////////////
  63. // The scanner function to call whenever a new token is requested
  64. template<typename Iterator>
  65. BOOST_WAVE_DECL boost::wave::token_id scan(Scanner<Iterator> *s);
  66. ///////////////////////////////////////////////////////////////////////////////
  67. ///////////////////////////////////////////////////////////////////////////////
  68. // Utility functions
  69. #define RE2C_ASSERT BOOST_ASSERT
  70. template<typename Iterator>
  71. int get_one_char(Scanner<Iterator> *s)
  72. {
  73. RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
  74. if (s->act < s->last)
  75. return *(s->act)++;
  76. return -1;
  77. }
  78. template<typename Iterator>
  79. std::ptrdiff_t rewind_stream (Scanner<Iterator> *s, int cnt)
  80. {
  81. std::advance(s->act, cnt);
  82. RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
  83. return std::distance(s->first, s->act);
  84. }
  85. template<typename Iterator>
  86. std::size_t get_first_eol_offset(Scanner<Iterator>* s)
  87. {
  88. if (!AQ_EMPTY(s->eol_offsets))
  89. {
  90. return s->eol_offsets->queue[s->eol_offsets->head];
  91. }
  92. else
  93. {
  94. return (unsigned int)-1;
  95. }
  96. }
  97. template<typename Iterator>
  98. void adjust_eol_offsets(Scanner<Iterator>* s, std::size_t adjustment)
  99. {
  100. aq_queue q;
  101. std::size_t i;
  102. if (!s->eol_offsets)
  103. s->eol_offsets = aq_create();
  104. q = s->eol_offsets;
  105. if (AQ_EMPTY(q))
  106. return;
  107. i = q->head;
  108. while (i != q->tail)
  109. {
  110. if (adjustment > q->queue[i])
  111. q->queue[i] = 0;
  112. else
  113. q->queue[i] -= adjustment;
  114. ++i;
  115. if (i == q->max_size)
  116. i = 0;
  117. }
  118. if (adjustment > q->queue[i])
  119. q->queue[i] = 0;
  120. else
  121. q->queue[i] -= adjustment;
  122. }
  123. template<typename Iterator>
  124. int count_backslash_newlines(Scanner<Iterator> *s, uchar *cursor)
  125. {
  126. std::size_t diff, offset;
  127. int skipped = 0;
  128. /* figure out how many backslash-newlines skipped over unknowingly. */
  129. diff = cursor - s->bot;
  130. offset = get_first_eol_offset(s);
  131. while (offset <= diff && offset != (unsigned int)-1)
  132. {
  133. skipped++;
  134. aq_pop(s->eol_offsets);
  135. offset = get_first_eol_offset(s);
  136. }
  137. return skipped;
  138. }
  139. BOOST_WAVE_DECL bool is_backslash(uchar *p, uchar *end, int &len);
  140. #define BOOST_WAVE_BSIZE 196608
  141. template<typename Iterator>
  142. uchar *fill(Scanner<Iterator> *s, uchar *cursor)
  143. {
  144. using namespace std; // some systems have memcpy etc. in namespace std
  145. if(!s->eof)
  146. {
  147. uchar* p;
  148. std::ptrdiff_t cnt = s->tok - s->bot;
  149. if(cnt)
  150. {
  151. if (NULL == s->lim)
  152. s->lim = s->top;
  153. size_t length = s->lim - s->tok;
  154. if(length > 0){
  155. memmove(s->bot, s->tok, length);
  156. }
  157. s->tok = s->cur = s->bot;
  158. s->ptr -= cnt;
  159. cursor -= cnt;
  160. s->lim -= cnt;
  161. adjust_eol_offsets(s, cnt);
  162. }
  163. if((s->top - s->lim) < BOOST_WAVE_BSIZE)
  164. {
  165. uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BOOST_WAVE_BSIZE)*sizeof(uchar));
  166. if (buf == 0)
  167. {
  168. (*s->error_proc)(s, lexing_exception::unexpected_error,
  169. "Out of memory!");
  170. /* get the scanner to stop */
  171. *cursor = 0;
  172. return cursor;
  173. }
  174. size_t length = s->lim - s->tok;
  175. if(length > 0){
  176. memmove(buf, s->tok, length);
  177. }
  178. s->tok = s->cur = buf;
  179. s->ptr = &buf[s->ptr - s->bot];
  180. cursor = &buf[cursor - s->bot];
  181. s->lim = &buf[s->lim - s->bot];
  182. s->top = &s->lim[BOOST_WAVE_BSIZE];
  183. free(s->bot);
  184. s->bot = buf;
  185. }
  186. cnt = std::distance(s->act, s->last);
  187. if (cnt > BOOST_WAVE_BSIZE)
  188. cnt = BOOST_WAVE_BSIZE;
  189. uchar * dst = s->lim;
  190. for (std::ptrdiff_t idx = 0; idx < cnt; ++idx)
  191. {
  192. *dst++ = *s->act++;
  193. }
  194. if (cnt != BOOST_WAVE_BSIZE)
  195. {
  196. s->eof = &s->lim[cnt]; *(s->eof)++ = '\0';
  197. }
  198. /* backslash-newline erasing time */
  199. /* first scan for backslash-newline and erase them */
  200. for (p = s->lim; p < s->lim + cnt - 2; ++p)
  201. {
  202. int len = 0;
  203. if (is_backslash(p, s->lim + cnt, len))
  204. {
  205. if (*(p+len) == '\n')
  206. {
  207. int offset = len + 1;
  208. memmove(p, p + offset, s->lim + cnt - p - offset);
  209. cnt -= offset;
  210. --p;
  211. aq_enqueue(s->eol_offsets, p - s->bot + 1);
  212. }
  213. else if (*(p+len) == '\r')
  214. {
  215. if (*(p+len+1) == '\n')
  216. {
  217. int offset = len + 2;
  218. memmove(p, p + offset, s->lim + cnt - p - offset);
  219. cnt -= offset;
  220. --p;
  221. }
  222. else
  223. {
  224. int offset = len + 1;
  225. memmove(p, p + offset, s->lim + cnt - p - offset);
  226. cnt -= offset;
  227. --p;
  228. }
  229. aq_enqueue(s->eol_offsets, p - s->bot + 1);
  230. }
  231. }
  232. }
  233. /* FIXME: the following code should be fixed to recognize correctly the
  234. trigraph backslash token */
  235. /* check to see if what we just read ends in a backslash */
  236. if (cnt >= 2)
  237. {
  238. uchar last = s->lim[cnt-1];
  239. uchar last2 = s->lim[cnt-2];
  240. /* check \ EOB */
  241. if (last == '\\')
  242. {
  243. int next = get_one_char(s);
  244. /* check for \ \n or \ \r or \ \r \n straddling the border */
  245. if (next == '\n')
  246. {
  247. --cnt; /* chop the final \, we've already read the \n. */
  248. aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
  249. }
  250. else if (next == '\r')
  251. {
  252. int next2 = get_one_char(s);
  253. if (next2 == '\n')
  254. {
  255. --cnt; /* skip the backslash */
  256. }
  257. else
  258. {
  259. /* rewind one, and skip one char */
  260. rewind_stream(s, -1);
  261. --cnt;
  262. }
  263. aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
  264. }
  265. else if (next != -1) /* -1 means end of file */
  266. {
  267. /* next was something else, so rewind the stream */
  268. rewind_stream(s, -1);
  269. }
  270. }
  271. /* check \ \r EOB */
  272. else if (last == '\r' && last2 == '\\')
  273. {
  274. int next = get_one_char(s);
  275. if (next == '\n')
  276. {
  277. cnt -= 2; /* skip the \ \r */
  278. }
  279. else
  280. {
  281. /* rewind one, and skip two chars */
  282. rewind_stream(s, -1);
  283. cnt -= 2;
  284. }
  285. aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
  286. }
  287. /* check \ \n EOB */
  288. else if (last == '\n' && last2 == '\\')
  289. {
  290. cnt -= 2;
  291. aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
  292. }
  293. }
  294. s->lim += cnt;
  295. if (s->eof) /* eof needs adjusting if we erased backslash-newlines */
  296. {
  297. s->eof = s->lim;
  298. *(s->eof)++ = '\0';
  299. }
  300. }
  301. return cursor;
  302. }
  303. #undef BOOST_WAVE_BSIZE
  304. ///////////////////////////////////////////////////////////////////////////////
  305. // Special wrapper class holding the current cursor position
  306. struct BOOST_WAVE_DECL uchar_wrapper
  307. {
  308. uchar_wrapper (uchar *base_cursor, std::size_t column = 1);
  309. uchar_wrapper& operator++();
  310. uchar_wrapper& operator--();
  311. uchar operator* () const;
  312. operator uchar *() const;
  313. friend BOOST_WAVE_DECL std::ptrdiff_t
  314. operator- (uchar_wrapper const& lhs, uchar_wrapper const& rhs);
  315. uchar *base_cursor;
  316. std::size_t column;
  317. };
  318. ///////////////////////////////////////////////////////////////////////////////
  319. template<typename Iterator>
  320. boost::wave::token_id scan(Scanner<Iterator> *s)
  321. {
  322. BOOST_ASSERT(0 != s->error_proc); // error handler must be given
  323. uchar_wrapper cursor (s->tok = s->cur, s->column = s->curr_column);
  324. uchar_wrapper marker (s->ptr);
  325. uchar_wrapper limit (s->lim);
  326. typedef BOOST_WAVE_STRINGTYPE string_type;
  327. string_type rawstringdelim; // for use with C++11 raw string literals
  328. // include the correct Re2C token definition rules
  329. #if (defined (__FreeBSD__) || defined (__DragonFly__) || defined (__OpenBSD__)) && defined (T_DIVIDE)
  330. #undef T_DIVIDE
  331. #endif
  332. #if BOOST_WAVE_USE_STRICT_LEXER != 0
  333. #include "strict_cpp_re.inc"
  334. #else
  335. #include "cpp_re.inc"
  336. #endif
  337. } /* end of scan */
  338. ///////////////////////////////////////////////////////////////////////////////
  339. } // namespace re2clex
  340. } // namespace cpplexer
  341. } // namespace wave
  342. } // namespace boost
  343. #ifdef BOOST_MSVC
  344. #pragma warning(pop)
  345. #endif
  346. #undef BOOST_WAVE_RET
  347. #undef YYCTYPE
  348. #undef YYCURSOR
  349. #undef YYLIMIT
  350. #undef YYMARKER
  351. #undef YYFILL
  352. // the suffix header occurs after all of the code
  353. #ifdef BOOST_HAS_ABI_HEADERS
  354. #include BOOST_ABI_SUFFIX
  355. #endif
  356. #endif // !defined(BOOST_CPP_RE_HPP_B76C4F5E_63E9_4B8A_9975_EC32FA6BF027_INCLUDED)