diff --git a/IPython/testing/iptest.py b/IPython/testing/iptest.py
index 829f797..2c5070a 100644
--- a/IPython/testing/iptest.py
+++ b/IPython/testing/iptest.py
@@ -236,6 +236,11 @@ def make_exclude():
         exclusions.append(ipjoin('core', 'history'))
     if not have['wx']:
         exclusions.append(ipjoin('lib', 'inputhookwx'))
+    
+    # FIXME: temporarily disable autoreload tests, as they can produce
+    # spurious failures in subsequent tests (cythonmagic).
+    exclusions.append(ipjoin('extensions', 'autoreload'))
+    exclusions.append(ipjoin('extensions', 'tests', 'test_autoreload'))
 
     # We do this unconditionally, so that the test suite doesn't import
     # gtk, changing the default encoding and masking some unicode bugs.