PyMC v5.0.1 - tutorials in docs page not working from the conda environment (Windows 11)

Hello,

I have just installed the new pymc using the installation step in the docs:

conda create -c conda-forge -n pymc_env "pymc>=4"
conda activate pymc_env

I am trying to run the code in some of the tutorials in the docs page, and always get the same error for all the tutorials. Here’s the code:

import pymc as pm
print(f"Running on PyMC v{pm.__version__}") # output : Running on PyMC v5.0.1
import pandas as pd
import numpy as np

# fmt: off
disaster_data = pd.Series(
    [4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6,
    3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5,
    2, 2, 3, 4, 2, 1, 3, np.nan, 2, 1, 1, 1, 1, 3, 0, 0,
    1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1,
    0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2,
    3, 3, 1, np.nan, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4,
    0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]
)
# fmt: on
years = np.arange(1851, 1962)

with pm.Model() as disaster_model:

    switchpoint = pm.DiscreteUniform("switchpoint", lower=years.min(), upper=years.max())

    # Priors for pre- and post-switch rates number of disasters
    early_rate = pm.Exponential("early_rate", 1.0)
    late_rate = pm.Exponential("late_rate", 1.0)

    # Allocate appropriate Poisson rates to years before and after current
    rate = pm.math.switch(switchpoint >= years, early_rate, late_rate)

    disasters = pm.Poisson("disasters", rate, observed=disaster_data)

with disaster_model:
    idata = pm.sample(10000)

And the error message I get is the following:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[13], line 2
      1 with disaster_model:
----> 2     idata = pm.sample(10000, chains = (2))

File ~\anaconda3\envs\new_pymc\Lib\site-packages\pymc\sampling\mcmc.py:529, in sample(draws, step, init, n_init, initvals, trace, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, callback, jitter_max_retries, return_inferencedata, keep_warning_stat, idata_kwargs, mp_ctx, **kwargs)
    527 _print_step_hierarchy(step)
    528 try:
--> 529     mtrace = _mp_sample(**sample_args, **parallel_args)
    530 except pickle.PickleError:
    531     _log.warning("Could not pickle model, sampling singlethreaded.")

File ~\anaconda3\envs\new_pymc\Lib\site-packages\pymc\sampling\mcmc.py:1005, in _mp_sample(draws, tune, step, chains, cores, random_seed, start, progressbar, trace, model, callback, discard_tuned_samples, mp_ctx, **kwargs)
    992 draws -= tune
    994 traces = [
    995     _init_trace(
    996         expected_length=draws + tune,
   (...)
   1002     for chain_number in range(chains)
   1003 ]
-> 1005 sampler = ps.ParallelSampler(
   1006     draws=draws,
   1007     tune=tune,
   1008     chains=chains,
   1009     cores=cores,
   1010     seeds=random_seed,
   1011     start_points=start,
   1012     step_method=step,
   1013     progressbar=progressbar,
   1014     mp_ctx=mp_ctx,
   1015 )
   1016 try:
   1017     try:

File ~\anaconda3\envs\new_pymc\Lib\site-packages\pymc\sampling\parallel.py:401, in ParallelSampler.__init__(self, draws, tune, chains, cores, seeds, start_points, step_method, progressbar, mp_ctx)
    399 step_method_pickled = None
    400 if mp_ctx.get_start_method() != "fork":
--> 401     step_method_pickled = cloudpickle.dumps(step_method, protocol=-1)
    403 self._samplers = [
    404     ProcessAdapter(
    405         draws,
   (...)
    414     for chain, seed, start in zip(range(chains), seeds, start_points)
    415 ]
    417 self._inactive = self._samplers.copy()

File ~\anaconda3\envs\new_pymc\Lib\site-packages\cloudpickle\cloudpickle_fast.py:73, in dumps(obj, protocol, buffer_callback)
     69 with io.BytesIO() as file:
     70     cp = CloudPickler(
     71         file, protocol=protocol, buffer_callback=buffer_callback
     72     )
---> 73     cp.dump(obj)
     74     return file.getvalue()

File ~\anaconda3\envs\new_pymc\Lib\site-packages\cloudpickle\cloudpickle_fast.py:602, in CloudPickler.dump(self, obj)
    600 def dump(self, obj):
    601     try:
--> 602         return Pickler.dump(self, obj)
    603     except RuntimeError as e:
    604         if "recursion" in e.args[0]:

File ~\anaconda3\envs\new_pymc\Lib\site-packages\cloudpickle\cloudpickle_fast.py:692, in CloudPickler.reducer_override(self, obj)
    690     return _class_reduce(obj)
    691 elif isinstance(obj, types.FunctionType):
--> 692     return self._function_reduce(obj)
    693 else:
    694     # fallback to save_global, including the Pickler's
    695     # dispatch_table
    696     return NotImplemented

File ~\anaconda3\envs\new_pymc\Lib\site-packages\cloudpickle\cloudpickle_fast.py:565, in CloudPickler._function_reduce(self, obj)
    563     return NotImplemented
    564 else:
--> 565     return self._dynamic_function_reduce(obj)

File ~\anaconda3\envs\new_pymc\Lib\site-packages\cloudpickle\cloudpickle_fast.py:546, in CloudPickler._dynamic_function_reduce(self, func)
    544 """Reduce a function that is not pickleable via attribute lookup."""
    545 newargs = self._function_getnewargs(func)
--> 546 state = _function_getstate(func)
    547 return (types.FunctionType, newargs, state, None, None,
    548         _function_setstate)

File ~\anaconda3\envs\new_pymc\Lib\site-packages\cloudpickle\cloudpickle_fast.py:157, in _function_getstate(func)
    139 def _function_getstate(func):
    140     # - Put func's dynamic attributes (stored in func.__dict__) in state. These
    141     #   attributes will be restored at unpickling time using
   (...)
    144     #   unpickling time by iterating over slotstate and calling setattr(func,
    145     #   slotname, slotvalue)
    146     slotstate = {
    147         "__name__": func.__name__,
    148         "__qualname__": func.__qualname__,
   (...)
    154         "__closure__": func.__closure__,
    155     }
--> 157     f_globals_ref = _extract_code_globals(func.__code__)
    158     f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in
    159                  func.__globals__}
    161     closure_values = (
    162         list(map(_get_cell_contents, func.__closure__))
    163         if func.__closure__ is not None else ()
    164     )

File ~\anaconda3\envs\new_pymc\Lib\site-packages\cloudpickle\cloudpickle.py:334, in _extract_code_globals(co)
    330 names = co.co_names
    331 # We use a dict with None values instead of a set to get a
    332 # deterministic order (assuming Python 3.6+) and avoid introducing
    333 # non-deterministic pickle bytes as a results.
--> 334 out_names = {names[oparg]: None for _, oparg in _walk_global_ops(co)}
    336 # Declaring a function inside another one using the "def ..."
    337 # syntax generates a constant code object corresponding to the one
    338 # of the nested function's As the nested function may itself need
    339 # global variables, we need to introspect its code, extract its
    340 # globals, (look for code object in it's co_consts attribute..) and
    341 # add the result to code_globals
    342 if co.co_consts:

File ~\anaconda3\envs\new_pymc\Lib\site-packages\cloudpickle\cloudpickle.py:334, in <dictcomp>(.0)
    330 names = co.co_names
    331 # We use a dict with None values instead of a set to get a
    332 # deterministic order (assuming Python 3.6+) and avoid introducing
    333 # non-deterministic pickle bytes as a results.
--> 334 out_names = {names[oparg]: None for _, oparg in _walk_global_ops(co)}
    336 # Declaring a function inside another one using the "def ..."
    337 # syntax generates a constant code object corresponding to the one
    338 # of the nested function's As the nested function may itself need
    339 # global variables, we need to introspect its code, extract its
    340 # globals, (look for code object in it's co_consts attribute..) and
    341 # add the result to code_globals
    342 if co.co_consts:

IndexError: tuple index out of range

The same error happens when I run other examples as well… I have no clue where this is coming from. Also tried a fresh install twice but no luck.

Thanks.

Welcome!

A couple of questions. I assume you are Windows. Did you have a previous version of PyMC installed where you were not getting these errors? What happens if you add chains=1 to the call to pm.sample()?

Hi, thank you so much for getting back to me.

Yes, I am running on windows 11, and have not had a prior Pymc version that didn’t show these errors on this specific machine.
Chains = 1 does not throw the error, and it is able to sample.
Chains = 2 (or 3, 4, 5) still throws the error.

Thanks again for your help.

It looks like you may be running into one of the issues discussed here. I would start by wrapping your script in a function and wrapping a call to that function in an if __name__ == '__main__': statement. If one of the solutions discussed there works for you, great. If not, let us know and/or you can open a new issue (as you can see, that issue was closed because it turned into a combination of several different issues).

Hi,

Thanks for the suggestion,
Just wrapped it on the if __name__ == '__main__': statement, like so:

import pandas as pd
import numpy as np

print('*** Start script ***')

if __name__ == '__main__':

    disaster_data = pd.Series(
    [4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6,
    3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5,
    2, 2, 3, 4, 2, 1, 3, np.nan, 2, 1, 1, 1, 1, 3, 0, 0,
    1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1,
    0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2,
    3, 3, 1, np.nan, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4,
    0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1])

    years = np.arange(1851, 1962)

    with pm.Model() as disaster_model:

        switchpoint = pm.DiscreteUniform("switchpoint", lower=years.min(), upper=years.max())

        early_rate = pm.Exponential("early_rate", 1.0)
        late_rate = pm.Exponential("late_rate", 1.0)

        # Allocate appropriate Poisson rates to years before and after current
        rate = pm.math.switch(switchpoint >= years, early_rate, late_rate)

        disasters = pm.Poisson("disasters", rate, observed=disaster_data)

    with disaster_model:
        idata = pm.sample(10000)

and I get same error as above.
for chains =1, it is able to sample,
for chains>1, get the same error as above.

for any number of cores, and chains=1 it samples.
for any number of cores, and chains>1 it does not sample.

Is there anything I could do on Windows to reset any settings that might be causing this, or something along those lines? Thank you

Hello again,

Just wanted to leave this here, I stumbled across this post:

and realized that in my previous environments, I always install jupyter with pip install jupyter, after installing Pymc.

However, installing notebook from conda-forge seems to solve the issue that I described above, with:

conda create -c conda-forge -n pymc5 "pymc>=4" python=3.11
conda activate pymc5
conda install -c conda-forge notebook

Now I am able to run the examples smoothly.

1 Like

Great to hear!