1 files changed, 84 insertions, 67 deletions
diff --git a/lib/libexpat/tests/basic_tests.c b/lib/libexpat/tests/basic_tests.c
index 7112a440187..91c8dd7a392 100644
--- a/lib/libexpat/tests/basic_tests.c
+++ b/lib/libexpat/tests/basic_tests.c
@@ -1202,6 +1202,49 @@ START_TEST(test_wfc_no_recursive_entity_refs) {
 }
 END_TEST
 
+START_TEST(test_recursive_external_parameter_entity_2) {
+  struct TestCase {
+    const char *doc;
+    enum XML_Status expectedStatus;
+  };
+
+  struct TestCase cases[] = {
+      {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
+      {"<!ENTITY % p1 '%p1;'>"
+       "<!ENTITY % p1 'first declaration wins'>",
+       XML_STATUS_ERROR},
+      {"<!ENTITY % p1 'first declaration wins'>"
+       "<!ENTITY % p1 '%p1;'>",
+       XML_STATUS_OK},
+      {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
+  };
+
+  for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
+    const char *const doc = cases[i].doc;
+    const enum XML_Status expectedStatus = cases[i].expectedStatus;
+    set_subtest("%s", doc);
+
+    XML_Parser parser = XML_ParserCreate(NULL);
+    assert_true(parser != NULL);
+
+    XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
+    assert_true(ext_parser != NULL);
+
+    const enum XML_Status actualStatus
+        = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
+
+    assert_true(actualStatus == expectedStatus);
+    if (actualStatus != XML_STATUS_OK) {
+      assert_true(XML_GetErrorCode(ext_parser)
+                  == XML_ERROR_RECURSIVE_ENTITY_REF);
+    }
+
+    XML_ParserFree(ext_parser);
+    XML_ParserFree(parser);
+  }
+}
+END_TEST
+
 /* Test incomplete external entities are faulted */
 START_TEST(test_ext_entity_invalid_parse) {
   const char *text = "<!DOCTYPE doc [\n"
@@ -5202,13 +5245,7 @@ START_TEST(test_nested_entity_suspend) {
 END_TEST
 
 /* Regression test for quadratic parsing on large tokens */
-START_TEST(test_big_tokens_take_linear_time) {
-  const char *const too_slow_failure_message
-      = "Compared to the baseline runtime of the first test, this test has a "
-        "slowdown of more than <max_slowdown>. "
-        "Please keep increasing the value by 1 until it reliably passes the "
-        "test on your hardware and open a bug sharing that number with us. "
-        "Thanks in advance!";
+START_TEST(test_big_tokens_scale_linearly) {
   const struct {
     const char *pre;
     const char *post;
@@ -5220,65 +5257,57 @@ START_TEST(test_big_tokens_take_linear_time) {
       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
   };
   const int num_cases = sizeof(text) / sizeof(text[0]);
-  // For the test we need a <max_slowdown> value that is:
-  // (1) big enough that the test passes reliably (avoiding flaky tests), and
-  // (2) small enough that the test actually catches regressions.
-  const int max_slowdown = 15;
   char aaaaaa[4096];
   const int fillsize = (int)sizeof(aaaaaa);
   const int fillcount = 100;
+  const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
+  const unsigned max_factor = 4;
+  const unsigned max_scanned = max_factor * approx_bytes;
 
   memset(aaaaaa, 'a', fillsize);
 
   if (! g_reparseDeferralEnabledDefault) {
     return; // heuristic is disabled; we would get O(n^2) and fail.
   }
-#if ! defined(__linux__)
-  if (CLOCKS_PER_SEC < 100000) {
-    // Skip this test if clock() doesn't have reasonably good resolution.
-    // This workaround is primarily targeting Windows and FreeBSD, since
-    // XSI requires the value to be 1.000.000 (10x the condition here), and
-    // we want to be very sure that at least one platform in CI can catch
-    // regressions (through a failing test).
-    return;
-  }
-#endif
 
-  clock_t baseline = 0;
   for (int i = 0; i < num_cases; ++i) {
     XML_Parser parser = XML_ParserCreate(NULL);
     assert_true(parser != NULL);
     enum XML_Status status;
-    set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown,
-                text[i].pre, text[i].post);
-    const clock_t start = clock();
+    set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
 
     // parse the start text
+    g_bytesScanned = 0;
     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
                                      (int)strlen(text[i].pre), XML_FALSE);
     if (status != XML_STATUS_OK) {
       xml_failure(parser);
     }
+
     // parse lots of 'a', failing the test early if it takes too long
+    unsigned past_max_count = 0;
     for (int f = 0; f < fillcount; ++f) {
       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
       if (status != XML_STATUS_OK) {
         xml_failure(parser);
       }
-      // i == 0 means we're still calculating the baseline value
-      if (i > 0) {
-        const clock_t now = clock();
-        const clock_t clocks_so_far = now - start;
-        const int slowdown = clocks_so_far / baseline;
-        if (slowdown >= max_slowdown) {
-          fprintf(
-              stderr,
-              "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n",
-              f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown);
-          fail(too_slow_failure_message);
-        }
+      if (g_bytesScanned > max_scanned) {
+        // We're not done, and have already passed the limit -- the test will
+        // definitely fail. This block allows us to save time by failing early.
+        const unsigned pushed
+            = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
+        fprintf(
+            stderr,
+            "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
+            f + 1, fillcount, pushed, g_bytesScanned,
+            g_bytesScanned / (double)pushed, max_scanned, max_factor);
+        past_max_count++;
+        // We are failing, but allow a few log prints first. If we don't reach
+        // a count of five, the test will fail after the loop instead.
+        assert_true(past_max_count < 5);
       }
     }
+
     // parse the end text
     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
                                      (int)strlen(text[i].post), XML_TRUE);
@@ -5286,18 +5315,14 @@ START_TEST(test_big_tokens_take_linear_time) {
       xml_failure(parser);
     }
 
-    // how long did it take in total?
-    const clock_t end = clock();
-    const clock_t taken = end - start;
-    if (i == 0) {
-      assert_true(taken > 0); // just to make sure we don't div-by-0 later
-      baseline = taken;
-    }
-    const int slowdown = taken / baseline;
-    if (slowdown >= max_slowdown) {
-      fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n",
-              (int)taken, (int)baseline, slowdown, max_slowdown);
-      fail(too_slow_failure_message);
+    assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
+    if (g_bytesScanned > max_scanned) {
+      fprintf(
+          stderr,
+          "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
+          g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
+          max_factor);
+      fail("scanned too many bytes");
     }
 
     XML_ParserFree(parser);
@@ -5774,19 +5799,17 @@ START_TEST(test_varying_buffer_fills) {
                 fillsize[2], fillsize[3]);
     XML_Parser parser = XML_ParserCreate(NULL);
     assert_true(parser != NULL);
-    g_parseAttempts = 0;
 
     CharData storage;
     CharData_Init(&storage);
     XML_SetUserData(parser, &storage);
     XML_SetStartElementHandler(parser, start_element_event_handler);
 
+    g_bytesScanned = 0;
     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
-    int scanned_bytes = 0;   // sum of (buffered bytes at each actual parse)
     int offset = 0;
     while (*fillsize >= 0) {
       assert_true(offset + *fillsize <= document_length); // or test is invalid
-      const unsigned attempts_before = g_parseAttempts;
       const enum XML_Status status
           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
       if (status != XML_STATUS_OK) {
@@ -5796,28 +5819,20 @@ START_TEST(test_varying_buffer_fills) {
       fillsize++;
       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
       worstcase_bytes += offset; // we might've tried to parse all pending bytes
-      if (g_parseAttempts != attempts_before) {
-        assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse
-        assert_true(offset <= INT_MAX - scanned_bytes);      // avoid overflow
-        scanned_bytes += offset; // we *did* try to parse all pending bytes
-      }
     }
     assert_true(storage.count == 1); // the big token should've been parsed
-    assert_true(scanned_bytes > 0);  // test-the-test: does our counter work?
+    assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
     if (g_reparseDeferralEnabledDefault) {
       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
-      const int max_bytes_scanned = -*fillsize;
-      if (scanned_bytes > max_bytes_scanned) {
+      const unsigned max_bytes_scanned = -*fillsize;
+      if (g_bytesScanned > max_bytes_scanned) {
         fprintf(stderr,
-                "bytes scanned in parse attempts: actual=%d limit=%d \n",
-                scanned_bytes, max_bytes_scanned);
+                "bytes scanned in parse attempts: actual=%u limit=%u \n",
+                g_bytesScanned, max_bytes_scanned);
         fail("too many bytes scanned in parse attempts");
       }
-      assert_true(scanned_bytes <= worstcase_bytes);
-    } else {
-      // heuristic is disabled; every XML_Parse() will have reparsed
-      assert_true(scanned_bytes == worstcase_bytes);
     }
+    assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
 
     XML_ParserFree(parser);
   }
@@ -5972,6 +5987,8 @@ make_basic_test_case(Suite *s) {
   tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
   tcase_add_test__ifdef_xml_dtd(tc_basic,
                                 test_recursive_external_parameter_entity);
+  tcase_add_test__ifdef_xml_dtd(tc_basic,
+                                test_recursive_external_parameter_entity_2);
   tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
   tcase_add_test(tc_basic, test_suspend_xdecl);
   tcase_add_test(tc_basic, test_abort_epilog);
@@ -6065,7 +6082,7 @@ make_basic_test_case(Suite *s) {
   tcase_add_test__ifdef_xml_dtd(tc_basic,
                                 test_pool_integrity_with_unfinished_attr);
   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
-  tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
+  tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
   tcase_add_test(tc_basic, test_set_reparse_deferral);
   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);