JessicaTegner/pypandoc

Three tests failing with pypandoc 3.11 & pandoc 3 on openSUSE Tumbleweed

jayvdb opened this issue · 5 comments

jayvdb commented

It looks like

  1. pandoc 3 isnt supported yet,
  2. some error regarding data/sample.lua which could be another pandoc 3 thing or could be an openSUSE packaging thing, and
  3. test_depreaction_warnings looks very odd.

I will investigate the 2nd and 3rd of those first, and report back.
Would be good to know status and/or direction pandoc 3 support.

[  205s] =================================== FAILURES ===================================
[  205s] _________________ TestPypandoc.test_convert_with_custom_writer _________________
[  205s] 
[  205s] self = <tests.TestPypandoc testMethod=test_convert_with_custom_writer>
[  205s] 
[  205s]     def test_convert_with_custom_writer(self):
[  205s]         lua_file_content = self.create_sample_lua()
[  205s]         with closed_tempfile('.md', text='# title\n') as file_name:
[  205s]             with closed_tempfile('.lua', text=lua_file_content, dir_name="foo-bar+baz") as lua_file_name:
[  205s]                 expected = u'<h1 id="title">title</h1>{0}'.format(os.linesep)
[  205s] >               received = pypandoc.convert_file(file_name, lua_file_name)
[  205s] 
[  205s] tests.py:194: 
[  205s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
[  205s] pypandoc/__init__.py:168: in convert_file
[  205s]     return _convert_input(discovered_source_files, format, 'path', to, extra_args=extra_args,
[  205s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
[  205s] 
[  205s] source = '/tmp/tmpb5pe4xtp.md', format = 'markdown', input_type = 'path'
[  205s] to = '/tmp/tmpvvl8yo4sfoo-bar+baz/tmpyp6mx4fu.lua', extra_args = ()
[  205s] outputfile = None, filters = None, verify_format = True, sandbox = False
[  205s] cworkdir = None
[  205s] 
[  205s]     def _convert_input(source, format, input_type, to, extra_args=(),
[  205s]                        outputfile=None, filters=None, verify_format=True,
[  205s]                        sandbox=False, cworkdir=None):
[  205s]     
[  205s]         _check_log_handler()
[  205s]     
[  205s]         logger.debug("Ensuring pandoc path...")
[  205s]         _ensure_pandoc_path()
[  205s]     
[  205s]         if verify_format:
[  205s]             logger.debug("Verifying format...")
[  205s]             format, to = _validate_formats(format, to, outputfile)
[  205s]         else:
[  205s]             format = normalize_format(format)
[  205s]             to = normalize_format(to)
[  205s]     
[  205s]         logger.debug("Identifying input type...")
[  205s]         string_input = input_type == 'string'
[  205s]         if not string_input:
[  205s]             if isinstance(source, str):
[  205s]                 input_file = [source]
[  205s]             else:
[  205s]                 input_file = source
[  205s]         else:
[  205s]             input_file = []
[  205s]     
[  205s]         input_file = sorted(input_file)
[  205s]         args = [__pandoc_path, '--from=' + format]
[  205s]     
[  205s]         args.append('--to=' + to)
[  205s]     
[  205s]         args += input_file
[  205s]     
[  205s]         if outputfile:
[  205s]             args.append("--output=" + str(outputfile))
[  205s]     
[  205s]         if sandbox:
[  205s]             if ensure_pandoc_minimal_version(2,15): # sandbox was introduced in pandoc 2.15, so only add if we are using 2.15 or above.
[  205s]                 logger.debug("Adding sandbox argument...")
[  205s]                 args.append("--sandbox")
[  205s]             else:
[  205s]                 logger.warning("Sandbox argument was used, but pandoc version is too low. Ignoring argument.")
[  205s]     
[  205s]         args.extend(extra_args)
[  205s]     
[  205s]         # adds the proper filter syntax for each item in the filters list
[  205s]         if filters is not None:
[  205s]             if isinstance(filters, string_types):
[  205s]                 filters = filters.split()
[  205s]             f = ['--lua-filter=' + x if x.endswith(".lua") else '--filter=' + x for x in filters]
[  205s]             args.extend(f)
[  205s]     
[  205s]         # To get access to pandoc-citeproc when we use a included copy of pandoc,
[  205s]         # we need to add the pypandoc/files dir to the PATH
[  205s]         new_env = os.environ.copy()
[  205s]         files_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "files")
[  205s]         new_env["PATH"] = new_env.get("PATH", "") + os.pathsep + files_path
[  205s]         creation_flag = 0x08000000 if sys.platform == "win32" else 0 # set creation flag to not open pandoc in new console on windows
[  205s]     
[  205s]         old_wd = os.getcwd()
[  205s]         if cworkdir and old_wd != cworkdir:
[  205s]             os.chdir(cworkdir)
[  205s]     
[  205s]         logger.debug("Running pandoc...")
[  205s]         p = subprocess.Popen(
[  205s]             args,
[  205s]             stdin=subprocess.PIPE if string_input else None,
[  205s]             stdout=subprocess.PIPE,
[  205s]             stderr=subprocess.PIPE,
[  205s]             env=new_env,
[  205s]             creationflags=creation_flag)
[  205s]     
[  205s]         if cworkdir is not None:
[  205s]             os.chdir(old_wd)
[  205s]     
[  205s]         # something else than 'None' indicates that the process already terminated
[  205s]         if not (p.returncode is None):
[  205s]             raise RuntimeError(
[  205s]                 'Pandoc died with exitcode "%s" before receiving input: %s' % (p.returncode,
[  205s]                                                                                p.stderr.read())
[  205s]             )
[  205s]     
[  205s]         if string_input:
[  205s]             try:
[  205s]                 source = cast_bytes(source, encoding='utf-8')
[  205s]             except (UnicodeDecodeError, UnicodeEncodeError):
[  205s]                 # assume that it is already a utf-8 encoded string
[  205s]                 pass
[  205s]         try:
[  205s]             stdout, stderr = p.communicate(source if string_input else None)
[  205s]         except OSError:
[  205s]             # this is happening only on Py2.6 when pandoc dies before reading all
[  205s]             # the input. We treat that the same as when we exit with an error...
[  205s]             raise RuntimeError('Pandoc died with exitcode "%s" during conversion.' % (p.returncode))
[  205s]     
[  205s]         try:
[  205s]             stdout = stdout.decode('utf-8')
[  205s]         except UnicodeDecodeError:
[  205s]             # this shouldn't happen: pandoc more or less guarantees that the output is utf-8!
[  205s]             raise RuntimeError('Pandoc output was not utf-8.')
[  205s]     
[  205s]         try:
[  205s]             stderr = stderr.decode('utf-8')
[  205s]         except UnicodeDecodeError:
[  205s]             # this shouldn't happen: pandoc more or less guarantees that the output is utf-8!
[  205s]             raise RuntimeError('Pandoc output was not utf-8.')
[  205s]     
[  205s]         # check that pandoc returned successfully
[  205s]         if p.returncode != 0:
[  205s] >           raise RuntimeError(
[  205s]                 'Pandoc died with exitcode "%s" during conversion: %s' % (p.returncode, stderr)
[  205s]             )
[  205s] E           RuntimeError: Pandoc died with exitcode "4" during conversion: /tmp/tmpvvl8yo4sfoo-bar+baz/tmpyp6mx4fu.lua does not contain a custom writer
[  205s] 
[  205s] pypandoc/__init__.py:426: RuntimeError
[  205s] ----------------------------- Captured stdout call -----------------------------
[  205s] /home/abuild
[  205s] ----------------------------- Captured stderr call -----------------------------
[  205s] Could not find data file /usr/share/pandoc-3.1.2/data/sample.lua
[  205s] ____________________ TestPypandoc.test_depreaction_warnings ____________________
[  205s] 
[  205s] self = <tests.TestPypandoc testMethod=test_depreaction_warnings>
[  205s] 
[  205s]     def test_depreaction_warnings(self):
[  205s]         # convert itself is deprecated...
[  205s]         with assert_produces_warning(DeprecationWarning):
[  205s] >           pypandoc.convert('# some title\n', to='rst', format='md')
[  205s] E           AttributeError: module 'pypandoc' has no attribute 'convert'
[  205s] 
[  205s] tests.py:378: AttributeError
[  205s] ----------------------------- Captured stdout call -----------------------------
[  205s] /home/abuild
[  205s] _____________________ TestPypandoc.test_get_pandoc_version _____________________
[  205s] 
[  205s] self = <tests.TestPypandoc testMethod=test_get_pandoc_version>
[  205s] 
[  205s]     def test_get_pandoc_version(self):
[  205s]         assert "HOME" in os.environ, "No HOME set, this will error..."
[  205s]         version = pypandoc.get_pandoc_version()
[  205s]         self.assertTrue(isinstance(version, pypandoc.string_types))
[  205s]         major = int(version.split(".")[0])
[  205s]         # according to http://pandoc.org/releases.html there were only two versions 0.x ...
[  205s] >       self.assertTrue(major in [0, 1, 2])
[  205s] E       AssertionError: False is not true
[  205s] 
[  205s] tests.py:146: AssertionError
[  205s] ----------------------------- Captured stdout call -----------------------------
[  205s] /home/abuild

Hi.

So to take them in order.

  1. There is rudimentary pandoc 3 support at the moment.
  2. Most likely only fails with pandoc 3x because of the new way pandoc 3 handles lua filters
  3. Most likely unrelated to pandoc 3 support. Could you go into more details?

@jayvdb do you have any updates?

This also affects us now, we ship pandoc 3.1.6.1:

test_basic_conversion_from_file (tests.TestPypandoc) ... /homeless-shelter
ok
test_basic_conversion_from_file_pathlib (tests.TestPypandoc) ... /homeless-shelter
ok
test_basic_conversion_from_file_pattern (tests.TestPypandoc) ... /homeless-shelter
ok
test_basic_conversion_from_file_pattern_pathlib_glob (tests.TestPypandoc) ... /homeless-shelter
ok
test_basic_conversion_from_file_pattern_with_input_list (tests.TestPypandoc) ... /homeless-shelter
ok
test_basic_conversion_from_file_pattern_with_input_list_pathlib_glob (tests.TestPypandoc) ... /homeless-shelter
ok
test_basic_conversion_from_file_url (tests.TestPypandoc) ... /homeless-shelter
ok
test_basic_conversion_from_file_with_format (tests.TestPypandoc) ... /homeless-shelter
ok
test_basic_conversion_from_http_url (tests.TestPypandoc) ... skipped 'no network access during checkPhase'
test_basic_conversion_from_multiple_files (tests.TestPypandoc) ... /homeless-shelter
ok
test_basic_conversion_from_multiple_files_pathlib (tests.TestPypandoc) ... /homeless-shelter
ok
test_basic_conversion_from_string (tests.TestPypandoc) ... /homeless-shelter
ok
test_basic_conversion_to_file (tests.TestPypandoc) ... /homeless-shelter
ok
test_basic_conversion_to_pathlib_file (tests.TestPypandoc) ... /homeless-shelter
ok
test_call_with_nonexisting_file (tests.TestPypandoc) ... /homeless-shelter
ok
test_classify_pandoc_logging (tests.TestPypandoc) ... /homeless-shelter
ok
test_classify_pandoc_logging_default (tests.TestPypandoc) ... /homeless-shelter
ok
test_classify_pandoc_logging_invalid_level (tests.TestPypandoc) ... /homeless-shelter
ok
test_conversion_error (tests.TestPypandoc) ... /homeless-shelter
ok
test_conversion_from_markdown_with_extensions (tests.TestPypandoc) ... /homeless-shelter
ok
test_conversion_from_non_plain_text_file (tests.TestPypandoc) ... /homeless-shelter
ok
test_conversion_stderr (tests.TestPypandoc) ... /homeless-shelter
Could not fetch resource missing.png: replacing image with description
Could not fetch resource missing.png: replacing image with description

FAIL
test_conversion_stderr_nullhandler (tests.TestPypandoc) ... /homeless-shelter
Could not fetch resource missing.png: replacing image with description
Could not fetch resource missing.png: replacing image with description

ok
test_conversion_with_empty_filter (tests.TestPypandoc) ... /homeless-shelter
ok
test_conversion_with_lua_filter (tests.TestPypandoc) ... /homeless-shelter
ok
test_conversion_with_markdown_extensions (tests.TestPypandoc) ... /homeless-shelter
ok
test_conversion_with_mixed_filters (tests.TestPypandoc) ... /homeless-shelter
ok
test_conversion_with_python_filter (tests.TestPypandoc) ... /homeless-shelter
ok
test_convert_text_with_existing_file (tests.TestPypandoc) ... /homeless-shelter
ok
test_convert_with_custom_writer (tests.TestPypandoc) ... /homeless-shelter
Could not find data file /nix/store/7vhv1dd2pbd5f0lpgsg81v4sd5nzl303-pandoc-3.1.6.1-data/share/ghc-9.4.6/x86_64-linux-ghc-9.4.6/pandoc-3.1.6.1/data/sample.lua
ERROR
test_converts_valid_format (tests.TestPypandoc) ... /homeless-shelter
ok
test_does_not_convert_from_invalid_format (tests.TestPypandoc) ... /homeless-shelter
ok
test_does_not_convert_to_invalid_format (tests.TestPypandoc) ... /homeless-shelter
ok
test_ensure_pandoc_maximal_version (tests.TestPypandoc) ... /homeless-shelter
FAIL
test_ensure_pandoc_minimal_version (tests.TestPypandoc) ... /homeless-shelter
FAIL
test_get_pandoc_formats (tests.TestPypandoc) ... /homeless-shelter
ok
test_get_pandoc_path (tests.TestPypandoc) ... /homeless-shelter
ok
test_get_pandoc_version (tests.TestPypandoc) ... /homeless-shelter
ok
test_pdf_conversion (tests.TestPypandoc) ... /homeless-shelter
ok
test_unicode_input (tests.TestPypandoc) ... /homeless-shelter
ok

======================================================================
ERROR: test_convert_with_custom_writer (tests.TestPypandoc)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/build/source/tests.py", line 241, in test_convert_with_custom_writer
    received = pypandoc.convert_file(file_name, lua_file_name)
  File "/build/source/pypandoc/__init__.py", line 170, in convert_file
    return _convert_input(discovered_source_files, format, 'path', to, extra_args=extra_args,
  File "/build/source/pypandoc/__init__.py", line 420, in _convert_input
    raise RuntimeError(
RuntimeError: Pandoc died with exitcode "4" during conversion: /build/tmpj012jozhfoo-bar+baz/tmpa7nmnv9r.lua does not contain a custom writer


======================================================================
FAIL: test_conversion_stderr (tests.TestPypandoc)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/build/source/tests.py", line 462, in test_conversion_stderr
    self.assertEqual(expected, output)
AssertionError: '[WAR[39 chars]png: PandocResourceNotFound "missing.png"\n[WA[81 chars]\n\n' != '[WAR[39 chars]png: replacing image with description\n[WARNIN[73 chars]\n\n'
- [WARNING] Could not fetch resource missing.png: PandocResourceNotFound "missing.png"
- [WARNING] Could not fetch resource missing.png: PandocResourceNotFound "missing.png"
+ [WARNING] Could not fetch resource missing.png: replacing image with description
+ [WARNING] Could not fetch resource missing.png: replacing image with description



======================================================================
FAIL: test_ensure_pandoc_maximal_version (tests.TestPypandoc)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/build/source/tests.py", line 173, in test_ensure_pandoc_maximal_version
    assert pypandoc.ensure_pandoc_maximal_version(1,1) == False
AssertionError

======================================================================
FAIL: test_ensure_pandoc_minimal_version (tests.TestPypandoc)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/build/source/tests.py", line 165, in test_ensure_pandoc_minimal_version
    assert pypandoc.ensure_pandoc_minimal_version(1) == True
AssertionError

----------------------------------------------------------------------
Ran 40 tests in 7.061s

FAILED (failures=3, errors=1, skipped=1)
Test failed: <unittest.runner.TextTestResult run=40 errors=1 failures=3>
  • test_conversion_stderr seems like it was wrong to assume stability in warning formatting
  • Not sure what is going on with the version asserts yet, we do patch in a version statically. That's our bad.
  • test_convert_with_custom_writer seems like indeed something changed with lua writers.

Taking care of false negatives gives us:

======================================================================
ERROR: test_conversion_from_non_plain_text_file (tests.TestPypandoc)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/build/source/tests.py", line 527, in test_conversion_from_non_plain_text_file
    received = pypandoc.convert_text('# some title\n', to='docx', format='md', outputfile=file_name)
  File "/build/source/pypandoc/__init__.py", line 93, in convert_text
    return _convert_input(source, format, 'string', to, extra_args=extra_args,
  File "/build/source/pypandoc/__init__.py", line 420, in _convert_input
    raise RuntimeError(
RuntimeError: Pandoc died with exitcode "97" during conversion: Could not find data file data/data/docx/[Content_Types].xml


======================================================================
ERROR: test_conversion_stderr (tests.TestPypandoc)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/build/source/tests.py", line 448, in test_conversion_stderr
    with capture(pypandoc.convert_text,
  File "/nix/store/2c7sgx69p6mmp76cvmi5j6c72dj76jj8-python3-3.10.12/lib/python3.10/contextlib.py", line 135, in __enter__
    return next(self.gen)
  File "/build/source/tests.py", line 26, in capture
    command(*args, **kwargs)
  File "/build/source/pypandoc/__init__.py", line 93, in convert_text
    return _convert_input(source, format, 'string', to, extra_args=extra_args,
  File "/build/source/pypandoc/__init__.py", line 420, in _convert_input
    raise RuntimeError(
RuntimeError: Pandoc died with exitcode "97" during conversion: Could not find data file data/data/docx/[Content_Types].xml


======================================================================
ERROR: test_conversion_stderr_nullhandler (tests.TestPypandoc)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/build/source/tests.py", line 474, in test_conversion_stderr_nullhandler
    with capture(pypandoc.convert_text,
  File "/nix/store/2c7sgx69p6mmp76cvmi5j6c72dj76jj8-python3-3.10.12/lib/python3.10/contextlib.py", line 135, in __enter__
    return next(self.gen)
  File "/build/source/tests.py", line 26, in capture
    command(*args, **kwargs)
  File "/build/source/pypandoc/__init__.py", line 93, in convert_text
    return _convert_input(source, format, 'string', to, extra_args=extra_args,
  File "/build/source/pypandoc/__init__.py", line 420, in _convert_input
    raise RuntimeError(
RuntimeError: Pandoc died with exitcode "97" during conversion: Could not find data file data/data/docx/[Content_Types].xml


======================================================================
ERROR: test_convert_with_custom_writer (tests.TestPypandoc)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/build/source/tests.py", line 241, in test_convert_with_custom_writer
    received = pypandoc.convert_file(file_name, lua_file_name)
  File "/build/source/pypandoc/__init__.py", line 170, in convert_file
    return _convert_input(discovered_source_files, format, 'path', to, extra_args=extra_args,
  File "/build/source/pypandoc/__init__.py", line 420, in _convert_input
    raise RuntimeError(
RuntimeError: Pandoc died with exitcode "4" during conversion: /build/tmp8kps39kkfoo-bar+baz/tmp0zongnz4.lua does not contain a custom writer


======================================================================
FAIL: test_get_pandoc_version (tests.TestPypandoc)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/build/source/tests.py", line 161, in test_get_pandoc_version
    self.assertTrue(major in [0, 1, 2])
AssertionError: False is not true

----------------------------------------------------------------------
Ran 40 tests in 5.526s

FAILED (failures=1, errors=4, skipped=1)
  • test_get_pandoc_version is easy enough to fix.
  • test_convert_with_custom_writer: lua stuff was changed in pandoc > 3x