Create new dedicated data-library website
tkittel opened this issue · 2 comments
tkittel commented
As discussed with @MilanKlausz the data library page in the wiki needs a serious overhaul and upgrade (cf. also #123).
We are considering several ideas:
- Generate the DB, plot files, etc. in github CI
- Make the user experience more interactive, most likely with a wonderful mix of javascript and various technologies (to be investigated by Milan).
tkittel commented
Here is for reference the script we discussed today:
#!/usr/bin/env python3
import NCrystal as NC
import pathlib
def createDBEntry( filelistentry, *, plotfolder = None ):
e = Entry( filelistentry,
plotfolder = plotfolder )
d = {}
d['key'] = e.key
d['shortkey'] = e.shortkey
d['safekey'] = e.safekey
d['ncmat_header'] = e.ncmat_header
d['dump'] = e.dump
d['ncmat_contents'] = e.textData.rawData
d['plot_filename_xsect'] = e.plot_filename_xsect
d['extra_keywords'] = ''
return d
class Entry:
@property
def loaded_mat( self ): return self.__mat
@property
def key( self ): return self.__key
@property
def shortkey( self ): return self.__shortkey
@property
def safekey( self ): return self.__safekey
@property
def filelistentry( self ): return self.__fe
@property
def ncmat_header( self ): return self.__ncmathdr
@property
def textData( self ): return self.__td
@property
def dump( self ): return self.__dump
@property
def plot_filename_xsect( self ): return self.__plot_xsect_file
def __init__( self, filelistentry, *, plotfolder = None ):
key = filelistentry.fullKey
shortkey = ( filelistentry.fullKey
if not filelistentry.fullKey.startswith('stdlib::')
else filelistentry.fullKey[len('stdlib::'):] )
#Fixme: we need to check against clashes. Perhaps also better use some
#generic urlencode function or some such?
self.__safekey = key.replace('/','_').replace(':','_').replace('.','d')
self.__key = key
self.__shortkey = shortkey
if plotfolder is None:
plotfolder = pathlib.Path('.').absolute()
self.__mat = NC.load(key)
self.__fe = filelistentry
self.__td = NC.createTextData(key)
self.__ncmathdr = [ e for e in self.__td
if (e and (e[0]=='@' or e.startswith('#') ) ) ]
i = [ i for i,e in enumerate(self.__ncmathdr) if e[0]=='@' ][0]
self.__ncmathdr = [ e[1:] for e in self.__ncmathdr[0:i] ]
while all( (not e or e.startswith(' ')) for e in self.__ncmathdr ):
self.__ncmathdr = [ e[1:] for e in self.__ncmathdr ]
while not self.__ncmathdr[0].strip():
self.__ncmathdr = self.__ncmathdr[1:]
while not self.__ncmathdr[-1].strip():
self.__ncmathdr = self.__ncmathdr[:-1]
import subprocess
p = subprocess.run(['nctool','-d',key],
capture_output=True,check=True)
self.__dump = p.stdout.decode()
#Generate plots:
self.__mat.plot(do_show=False)
import matplotlib.pyplot as plt
self.__plot_xsect_file = '%s.png'%self.__safekey
plt.savefig(plotfolder.joinpath(self.__plot_xsect_file))
def create_DB_contents( plotfolder ):
plotfolder = pathlib.Path(plotfolder)
if plotfolder.exists():
raise RuntimeError(f'Plot folder already exists: {plotfolder}')
plotfolder.mkdir(parents=True)
db = []
for fe in NC.browseFiles():
if not ( fe.name.startswith('Ac')
or 'gasmix::BF3' in fe.fullKey ):
continue
print(f"Processing {fe.fullKey}")
db.append( createDBEntry( fe, plotfolder = plotfolder ) )
return db
def create_DB( outfolder ):
outfolder = pathlib.Path(outfolder)
if outfolder.exists():
raise RuntimeError(f'Folder already exists: {outfolder}')
outfolder.mkdir(parents=True)
jsonfile = outfolder / 'db.json'
plotfolder = outfolder / 'plots'
db = create_DB_contents(plotfolder)
import pprint
pprint.pprint(db)
import json
with pathlib.Path(jsonfile).open('wt') as fh:
json.dump(db, fh )
#print(f"Wrote {jsonfile}")
return outfolder
if __name__=='__main__':
create_DB('./autogen_db')
tkittel commented
For reference, here are instructions from @marquezj for the new EXFOR interface which we will eventually need when adding exfor data points.
https://nds.iaea.org/exfor/x4guide/API/
import numpy as np
import NCrystal as NC
import pandas as pd
from urllib.request import urlopen, Request
import io
import matplotlib.pyplot as plt
x4ids = [11762002, 11355002]
for x4id in x4ids:
req = Request(
url=f'https://nds.iaea.org/exfor/x4get?DatasetID={x4id}&op=csv',
data=None,
headers={
'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'
}
)
with urlopen(req) as f:
df = pd.read_csv(io.StringIO(f.read().decode('utf-8')))
x4_E, x4_xs = 1e6*df['EN (MEV) 1.1'].values, df['DATA (B) 0.1'].values
plt.loglog(x4_E, x4_xs, '.', label=f'X4: {x4id}')
pc_ni = NC.createScatter('Ni_sg225.ncmat')
ab_ni = NC.createAbsorption('Ni_sg225.ncmat')
E = np.geomspace(1e-4,5,1000)
a = plt.loglog(E, pc_ni.crossSectionNonOriented(E)+ab_ni.crossSectionNonOriented(E), label='NCrystal')
a = plt.legend()
a = plt.xlabel('Energy [eV]')
a = plt.ylabel('Total cross section per atom [b]')
"the column selection in pandas might require some massaging... I am not sure they use the same units always"