sdata package¶
Subpackages¶
Submodules¶
sdata.blob module¶
-
class
sdata.blob.
Blob
(**kwargs)[source]¶ Bases:
sdata.data.Data
Binary Large Object as reference
Warning
highly experimental
-
VAULT_TYPES
= ['filesystem', 'hdf5', 'db', 'www']¶
-
exists
(vault='filesystem')[source]¶ Test whether a object under the blob.url exists.
- Parameters
vault –
- Returns
-
property
md5
¶ calculate the md5 hash of the blob
- Returns
sha1
-
property
sha1
¶ calculate the sha1 hash of the blob
- Returns
sha1
-
update_hash
(fh, hashobject, buffer_size=65536)[source]¶ A hash represents the object used to calculate a checksum of a string of information.
hashobject = hashlib.md5() df = pd.DataFrame([1,2,3]) url = "/tmp/blob.csv" df.to_csv(url) blob = sdata.Blob(url=url) fh = open(url, "rb") blob.update_hash(fh, hashobject) hashobject.hexdigest()
- Parameters
fh – file handle
hashobject – hash object, e.g. hashlib.sha1()
buffer_size – buffer size (default buffer_size=65536)
- Returns
hashobject
-
property
url
¶ url of the blob
-
sdata.data module¶
-
class
sdata.data.
Data
(**kwargs)[source]¶ Bases:
object
Base sdata object
-
ATTR_NAMES
= []¶
-
SDATA_CLASS
= '!sdata_class'¶
-
SDATA_CTIME
= '!sdata_ctime'¶
-
SDATA_MTIME
= '!sdata_mtime'¶
-
SDATA_NAME
= '!sdata_name'¶
-
SDATA_PARENT
= '!sdata_parent'¶
-
SDATA_PROJECT
= '!sdata_project'¶
-
SDATA_UUID
= '!sdata_uuid'¶
-
SDATA_VERSION
= '!sdata_version'¶
-
property
asciiname
¶
-
static
clear_folder
(path)[source]¶ delete subfolder in export folder
- Parameters
path – path
- Returns
None
-
copy
()[source]¶ create a copy of the Data object
data = sdata.Data(name="data", uuid="38b26864e7794f5182d38459bab85842", description="this is remarkable") datac = data.copy() print("data {0.uuid}".format(data)) print("datac {0.uuid}".format(datac)) print("datac.metadata['!sdata_parent'] {0.value}".format(datac.metadata["sdata_parent"]))
data 38b26864e7794f5182d38459bab85842 datac 2c4eb15900af435d8cd9c8573ca777e2 datac.metadata['!sdata_parent'] 38b26864e7794f5182d38459bab85842
- Returns
Data
-
describe
()[source]¶ Generate descriptive info of the data
df = pd.DataFrame([1,2,3]) data = sdata.Data(name='my name', uuid='38b26864e7794f5182d38459bab85842', table=df, description="A remarkable description") data.describe()
0 metadata 3 table_rows 3 table_columns 1 description 24
- Returns
pd.DataFrame
-
property
description
¶ description of the object
-
property
df
¶ table object(pandas.DataFrame)
-
property
filename
¶
-
classmethod
from_csv
(s=None, filepath=None, sep=';')[source]¶ import sdata.Data from csv
- Parameters
s – csv str
filepath –
sep – separator (default=”;”)
- Returns
sdata.Data
-
classmethod
from_hdf5
(filepath, **kwargs)[source]¶ import sdata.Data from hdf5
- Parameters
filepath –
sep – separator (default=”;”)
- Returns
sdata.Data
-
classmethod
from_json
(s=None, filepath=None)[source]¶ create Data from json str or file
- Parameters
s – json str
filepath –
- Returns
sdata.Data
-
classmethod
from_url
(url=None, stype=None)[source]¶ create Data from json str or file
- Parameters
url – url
stype – “json” (“xlsx”, “csv”)
- Returns
sdata.Data
-
get_download_link
()[source]¶ Generates a link allowing the data in a given panda dataframe to be downloaded in: dataframe out: href string
-
property
group
¶ get group
-
property
name
¶ name of the object
-
property
osname
¶ - Returns
os compatible name (ascii?)
-
property
prefix
¶ prefix of the object name
-
property
project
¶ name of the project
-
refactor
(fix_columns=True, add_table_metadata=True)[source]¶ helper function
to cleanup dataframe column name
to define Attributes for all dataframe columns
-
property
sha3_256
¶ Return a SHA3 hash of the sData object with a hashbit length of 32 bytes.
sdata.Data(name="1", uuid=sdata.uuid_from_str("1")).sha3_256 'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa'
- Returns
hashlib.sha3_256.hexdigest()
-
property
sha3_256_table
¶ Return a SHA3 hash of the sData.table object with a hashbit length of 32 bytes.
sdata.Data(name="1", uuid=sdata.uuid_from_str("1")).sha3_256_table 'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa'
- Returns
hashlib.sha3_256.hexdigest()
-
property
table
¶ table object(pandas.DataFrame)
-
to_hdf5
(filepath, **kwargs)[source]¶ export sdata.Data to hdf5
- Parameters
filepath –
complib – default=’zlib’ [‘zlib’, ‘lzo’, ‘bzip2’, ‘blosc’, ‘blosc:blosclz’, ‘blosc:lz4’, ‘blosc:lz4hc’, ‘blosc:snappy’, ‘blosc:zlib’, ‘blosc:zstd’]
complevel – default=9 [0-9]
- Returns
-
to_html
(filepath, xlsx=True, style=None)[source]¶ export Data to html
- Parameters
filepath –
xlsx –
style –
- Returns
-
to_json
(filepath=None)[source]¶ export Data in json format
- Parameters
filepath – export file path (default:None)
- Returns
json str
-
tree_folder
(dir, padding=' ', print_files=True, hidden_files=False, last=True)[source]¶ print tree folder structure
-
update_hash
(hashobject)[source]¶ A hash represents the object used to calculate a checksum of a string of information.
data = sdata.Data() md5 = hashlib.md5() data.update_hash(md5) md5.hexdigest() 'bbf323bdcb0bf961803b5504a8a60d69' sha1 = hashlib.sha1() data.update_hash(sha1) sha1.hexdigest() '3c59368c7735c1ecaf03ebd4c595bb6e73e90f0c' hashobject = hashlib.sha3_256() data.update_hash(hashobject).hexdigest() 'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa' data.update_hash(hashobject).digest() b'M8...'
- Parameters
hash – hash object, e.g. hashlib.sha1()
- Returns
hash
-
property
uuid
¶ uuid of the object
-
-
class
sdata.data.
Schema
(**kwargs)[source]¶ Bases:
sdata.data.Data
Base sdata object
-
ATTR_NAMES
= []¶
-
sdata.metadata module¶
-
class
sdata.metadata.
Attribute
(name, value, **kwargs)[source]¶ Bases:
object
Attribute class
-
DTYPES
= {'bool': <class 'bool'>, 'float': <class 'float'>, 'int': <class 'int'>, 'str': <class 'str'>, 'timestamp': <class 'sdata.timestamp.TimeStamp'>}¶
-
property
description
¶ Attribute description
-
property
dtype
¶ Attribute type str
-
property
label
¶ Attribute label
-
property
name
¶ Attribute name
-
property
required
¶ Attribute required
-
to_csv
(prefix='', sep=',', quote=None)[source]¶ export Attribute to csv
- Parameters
prefix –
sep –
quote –
- Returns
-
property
unit
¶ Attribute unit
-
property
value
¶ Attribute value
-
-
class
sdata.metadata.
Metadata
(**kwargs)[source]¶ Bases:
object
Metadata container class
- each Metadata entry has has a
name (256)
value
unit
description
type (int, str, float, bool, timestamp)
-
ATTRIBUTEKEYS
= ['name', 'value', 'dtype', 'unit', 'description', 'label', 'required']¶
-
property
attributes
¶ returns Attributes
-
property
df
¶ create dataframe
-
classmethod
from_json
(jsonstr=None, filepath=None)[source]¶ create metadata from json file
- Parameters
jsonstr – json str
filepath – filepath to json file
- Returns
Metadata
-
classmethod
from_list
(mlist)[source]¶ create metadata from a list of Attribute values
- [[‘force_x’, 1.2, ‘float’, ‘kN’, ‘force in x-direction’],
[‘force_y’, 3.1, ‘float’, ‘N’, ‘force in y-direction’]]
-
static
guess_dtype_from_value
(value)[source]¶ guess dtype from value, e.g. ‘1.23’ -> ‘float’ ‘otto1.23’ -> ‘str’ 1 -> ‘int’ False -> ‘bool’
- Parameters
value –
- Returns
dtype(value), dtype [‘int’, ‘float’, ‘bool’, ‘str’]
-
property
name
¶ Name of the Metadata
-
relabel
(name, newname)[source]¶ relabel Attribute
- Parameters
name – old attribute name
newname – new attribute name
- Returns
None
-
property
required_attributes
¶
-
property
sdata_attributes
¶
-
set_unit_from_name
(add_description=True, fix_name=True)[source]¶ try to extract unit from attribute name
- Returns
-
property
sha3_256
¶ Return a new SHA3 hash object with a hashbit length of 32 bytes.
- Returns
hashlib.sha3_256.hexdigest()
-
property
size
¶ return number uf Attribute
-
update_hash
(hashobject)[source]¶ A hash represents the object used to calculate a checksum of a string of information.
hashobject = hashlib.sha3_256() metadata = Metadata() metadata.update_hash(hashobject) hash.hexdigest()
- Parameters
hash – hash object
- Returns
hash_function().hexdigest()
-
property
user_attributes
¶
-
property
user_df
¶ create dataframe for user attributes
-
sdata.metadata.
extract_name_unit
(value)[source]¶ extract name and unit from a combined string
value: 'Target Strain Rate (1/s) ' name : 'Target Strain Rate' unit : '1/s' value: 'Gauge Length [mm] monkey ' name : 'Gauge Length' unit : 'mm' value: 'Gauge Length <mm> whatever ' name : 'Gauge Length' unit : 'mm'
- Parameters
value – string, e.g. ‘Length <mm> whatever’
- Returns
name, unit
sdata.timestamp module¶
ISO 8601 date time string parsing
Basic usage:
>>> parse_date("2007-01-25T12:00:00Z")
datetime.datetime(2007, 1, 25, 12, 0, tzinfo=<iso8601.Utc …>)
MIT License
Copyright (c) 2007 - 2015 Michael Twomey
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
exception
sdata.timestamp.
ParseError
[source]¶ Bases:
Exception
Raised when there is a problem parsing a date string
-
class
sdata.timestamp.
TimeStamp
(datetimestr=None)[source]¶ Bases:
object
2017-04-26T09:04:00.660000+00:00
-
property
local
¶ returns the datetime isoformat string for the local timezone :returns str
-
property
utc
¶ returns the utc.isoformat string :returns str
-
property
-
sdata.timestamp.
parse_date
(datestring, default_timezone=datetime.timezone.utc)[source]¶ Parses ISO 8601 dates into datetime objects
The timezone is parsed from the date string. However it is quite common to have dates without a timezone (not strictly correct). In this case the default timezone specified in default_timezone is used. This is UTC by default.
- Parameters
datestring – The date to parse as a string
default_timezone – A datetime tzinfo instance to use when no timezone is specified in the datestring. If this is set to None then a naive datetime object is returned.
- Returns
A datetime.datetime instance
- Raises
ParseError when there is a problem parsing the date or constructing the datetime instance.
sdata.tools module¶
Module contents¶
-
class
sdata.
Blob
(**kwargs)[source]¶ Bases:
sdata.data.Data
Binary Large Object as reference
Warning
highly experimental
-
VAULT_TYPES
= ['filesystem', 'hdf5', 'db', 'www']¶
-
exists
(vault='filesystem')[source]¶ Test whether a object under the blob.url exists.
- Parameters
vault –
- Returns
-
property
md5
¶ calculate the md5 hash of the blob
- Returns
sha1
-
property
sha1
¶ calculate the sha1 hash of the blob
- Returns
sha1
-
update_hash
(fh, hashobject, buffer_size=65536)[source]¶ A hash represents the object used to calculate a checksum of a string of information.
hashobject = hashlib.md5() df = pd.DataFrame([1,2,3]) url = "/tmp/blob.csv" df.to_csv(url) blob = sdata.Blob(url=url) fh = open(url, "rb") blob.update_hash(fh, hashobject) hashobject.hexdigest()
- Parameters
fh – file handle
hashobject – hash object, e.g. hashlib.sha1()
buffer_size – buffer size (default buffer_size=65536)
- Returns
hashobject
-
property
url
¶ url of the blob
-
-
class
sdata.
Data
(**kwargs)[source]¶ Bases:
object
Base sdata object
-
ATTR_NAMES
= []¶
-
SDATA_CLASS
= '!sdata_class'¶
-
SDATA_CTIME
= '!sdata_ctime'¶
-
SDATA_MTIME
= '!sdata_mtime'¶
-
SDATA_NAME
= '!sdata_name'¶
-
SDATA_PARENT
= '!sdata_parent'¶
-
SDATA_PROJECT
= '!sdata_project'¶
-
SDATA_UUID
= '!sdata_uuid'¶
-
SDATA_VERSION
= '!sdata_version'¶
-
property
asciiname
¶
-
static
clear_folder
(path)[source]¶ delete subfolder in export folder
- Parameters
path – path
- Returns
None
-
copy
()[source]¶ create a copy of the Data object
data = sdata.Data(name="data", uuid="38b26864e7794f5182d38459bab85842", description="this is remarkable") datac = data.copy() print("data {0.uuid}".format(data)) print("datac {0.uuid}".format(datac)) print("datac.metadata['!sdata_parent'] {0.value}".format(datac.metadata["sdata_parent"]))
data 38b26864e7794f5182d38459bab85842 datac 2c4eb15900af435d8cd9c8573ca777e2 datac.metadata['!sdata_parent'] 38b26864e7794f5182d38459bab85842
- Returns
Data
-
describe
()[source]¶ Generate descriptive info of the data
df = pd.DataFrame([1,2,3]) data = sdata.Data(name='my name', uuid='38b26864e7794f5182d38459bab85842', table=df, description="A remarkable description") data.describe()
0 metadata 3 table_rows 3 table_columns 1 description 24
- Returns
pd.DataFrame
-
property
description
¶ description of the object
-
property
df
¶ table object(pandas.DataFrame)
-
property
filename
¶
-
classmethod
from_csv
(s=None, filepath=None, sep=';')[source]¶ import sdata.Data from csv
- Parameters
s – csv str
filepath –
sep – separator (default=”;”)
- Returns
sdata.Data
-
classmethod
from_hdf5
(filepath, **kwargs)[source]¶ import sdata.Data from hdf5
- Parameters
filepath –
sep – separator (default=”;”)
- Returns
sdata.Data
-
classmethod
from_json
(s=None, filepath=None)[source]¶ create Data from json str or file
- Parameters
s – json str
filepath –
- Returns
sdata.Data
-
classmethod
from_url
(url=None, stype=None)[source]¶ create Data from json str or file
- Parameters
url – url
stype – “json” (“xlsx”, “csv”)
- Returns
sdata.Data
-
get_download_link
()[source]¶ Generates a link allowing the data in a given panda dataframe to be downloaded in: dataframe out: href string
-
property
group
¶ get group
-
property
name
¶ name of the object
-
property
osname
¶ - Returns
os compatible name (ascii?)
-
property
prefix
¶ prefix of the object name
-
property
project
¶ name of the project
-
refactor
(fix_columns=True, add_table_metadata=True)[source]¶ helper function
to cleanup dataframe column name
to define Attributes for all dataframe columns
-
property
sha3_256
¶ Return a SHA3 hash of the sData object with a hashbit length of 32 bytes.
sdata.Data(name="1", uuid=sdata.uuid_from_str("1")).sha3_256 'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa'
- Returns
hashlib.sha3_256.hexdigest()
-
property
sha3_256_table
¶ Return a SHA3 hash of the sData.table object with a hashbit length of 32 bytes.
sdata.Data(name="1", uuid=sdata.uuid_from_str("1")).sha3_256_table 'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa'
- Returns
hashlib.sha3_256.hexdigest()
-
property
table
¶ table object(pandas.DataFrame)
-
to_hdf5
(filepath, **kwargs)[source]¶ export sdata.Data to hdf5
- Parameters
filepath –
complib – default=’zlib’ [‘zlib’, ‘lzo’, ‘bzip2’, ‘blosc’, ‘blosc:blosclz’, ‘blosc:lz4’, ‘blosc:lz4hc’, ‘blosc:snappy’, ‘blosc:zlib’, ‘blosc:zstd’]
complevel – default=9 [0-9]
- Returns
-
to_html
(filepath, xlsx=True, style=None)[source]¶ export Data to html
- Parameters
filepath –
xlsx –
style –
- Returns
-
to_json
(filepath=None)[source]¶ export Data in json format
- Parameters
filepath – export file path (default:None)
- Returns
json str
-
tree_folder
(dir, padding=' ', print_files=True, hidden_files=False, last=True)[source]¶ print tree folder structure
-
update_hash
(hashobject)[source]¶ A hash represents the object used to calculate a checksum of a string of information.
data = sdata.Data() md5 = hashlib.md5() data.update_hash(md5) md5.hexdigest() 'bbf323bdcb0bf961803b5504a8a60d69' sha1 = hashlib.sha1() data.update_hash(sha1) sha1.hexdigest() '3c59368c7735c1ecaf03ebd4c595bb6e73e90f0c' hashobject = hashlib.sha3_256() data.update_hash(hashobject).hexdigest() 'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa' data.update_hash(hashobject).digest() b'M8...'
- Parameters
hash – hash object, e.g. hashlib.sha1()
- Returns
hash
-
property
uuid
¶ uuid of the object
-
-
class
sdata.
Schema
(**kwargs)[source]¶ Bases:
sdata.data.Data
Base sdata object
-
ATTR_NAMES
= []¶
-