sif_addition

[1]:
from EduNLP.SIF import is_sif, to_sif,sif4sci
D:\MySoftwares\Anaconda\envs\data\lib\site-packages\gensim\similarities\__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package <https://pypi.org/project/python-Levenshtein/> is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.
  warnings.warn(msg)

is_sif

[2]:
text = '若$x,y$满足约束条件' \
     '$\\left\\{\\begin{array}{c}2 x+y-2 \\leq 0 \\\\ x-y-1 \\geq 0 \\\\ y+1 \\geq 0\\end{array}\\right.$,' \
    '则$z=x+7 y$的最大值$\\SIFUnderline$'

is_sif(text)

[2]:
True
[3]:
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'
is_sif(text)
[3]:
False
[4]:
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'
is_sif(text, return_parser=True)
[4]:
(False, <EduNLP.SIF.parser.parser.Parser at 0x2a3083fa978>)

to_sif

[5]:
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'
to_sif(text)
[5]:
'某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$(单位...'
[6]:
import time
# ------------不使用‘加速’机制--------------- #
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'*150
start = time.time()
if not is_sif(text):
    siftext = to_sif(text)
print("[1]siftext : {} ,consume time [{}s]".format(siftext[:35], time.time() - start))

# ------------使用‘加速’机制--------------- #
start = time.time()
ret = is_sif(text, return_parser=True)
print("[2]return : ", ret)
if ret[0] is not True:
    siftext = to_sif(text, parser=ret[1])
print("[2]siftext : {} ,consume time [{}s]".format(siftext[:35], time.time() - start))
[1]siftext : 某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$(单位... ,consume time [0.018142223358154297s]
[2]return :  (False, <EduNLP.SIF.parser.parser.Parser object at 0x000002A30840FC88>)
[2]siftext : 某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$(单位... ,consume time [0.008990764617919922s]

sif4sci

to_symbolize: - “t”: text - “f”: formula - “g”: figure - “m”: question mark

[7]:
test_item = r"如图所示,则$\bigtriangleup ABC$的面积是$\SIFBlank$。$\FigureID{1}$"
t1 = sif4sci(test_item)
t1
[7]:
['如图所示', '\\bigtriangleup', 'ABC', '面积', '\\SIFBlank', \FigureID{1}]
[8]:
t1.describe()
[8]:
{'t': 2, 'f': 2, 'g': 1, 'm': 1}
[9]:
with t1.filter('fgm'):
    print(t1)
['如图所示', '面积']
[10]:
with t1.filter(keep='t'):
    print(t1)
['如图所示', '面积']
[11]:
with t1.filter():
    print(t1)
['如图所示', '\\bigtriangleup', 'ABC', '面积', '\\SIFBlank', \FigureID{1}]
[12]:
t1.text_tokens
[12]:
['如图所示', '面积']
[13]:
t1.formula_tokens
[13]:
['\\bigtriangleup', 'ABC']
[14]:
t1.figure_tokens
[14]:
[\FigureID{1}]
[15]:
t1.ques_mark_tokens
[15]:
['\\SIFBlank']
[16]:
sif4sci(test_item, symbol="gm", tokenization_params={"formula_params": {"method": "ast"}})
[16]:
['如图所示', <Formula: \bigtriangleup ABC>, '面积', '[MARK]', '[FIGURE]']
[17]:
sif4sci(test_item, symbol="tfgm")
[17]:
['[TEXT]', '[FORMULA]', '[TEXT]', '[MARK]', '[TEXT]', '[FIGURE]']
[18]:
sif4sci(test_item, symbol="gm", tokenization_params={"formula_params": {"method": "ast", "return_type": "list"}})
[18]:
['如图所示', '\\bigtriangleup', 'A', 'B', 'C', '面积', '[MARK]', '[FIGURE]']
[19]:
test_item_1 = {
    "stem": r"若$x=2$, $y=\sqrt{x}$,则下列说法正确的是$\SIFChoice$",
    "options": [r"$x < y$", r"$y = x$", r"$y < x$"]
}
[20]:
tls = [
        sif4sci(e, symbol="gm",
        tokenization_params={
        "formula_params": {
                "method": "ast", "return_type": "list", "ord2token": True, "var_numbering": True,
                "link_variable": False}
        })
        for e in ([test_item_1["stem"]] + test_item_1["options"])
]
[21]:
tls
[21]:
[['mathord_0', '=', 'textord', 'mathord_1', '=', 'mathord_0', '{ }', '\\sqrt', '说法', '正确', '[MARK]'],
 ['mathord_0', '<', 'mathord_1'],
 ['mathord_0', '=', 'mathord_1'],
 ['mathord_0', '<', 'mathord_1']]
[22]:
tls[1:]
[22]:
[['mathord_0', '<', 'mathord_1'],
 ['mathord_0', '=', 'mathord_1'],
 ['mathord_0', '<', 'mathord_1']]
[23]:
from EduNLP.utils import dict2str4sif

test_item_1_str = dict2str4sif(test_item_1, tag_mode="head", add_list_no_tag=False)
test_item_1_str
[23]:
'$\\SIFTag{stem}$若$x=2$, $y=\\sqrt{x}$,则下列说法正确的是$\\SIFChoice$$\\SIFTag{options}$$x < y$$\\SIFSep$$y = x$$\\SIFSep$$y < x$'
[24]:
tl1 = sif4sci(
    test_item_1_str,
    symbol="gm",
    tokenization_params={
        "formula_params": {"method": "ast", "return_type": "list", "ord2token": True}
    })
[25]:
tl1.get_segments()[0]
[25]:
['\\SIFTag{stem}']
[26]:
tl1.get_segments()[1:3]
[26]:
[['[TEXT_BEGIN]', '[TEXT_END]'],
 ['[FORMULA_BEGIN]', 'mathord', '=', 'textord', '[FORMULA_END]']]
[27]:
tl1.get_segments(add_seg_type=False)[0:3]
[27]:
[['\\SIFTag{stem}'],
 ['mathord', '=', 'textord'],
 ['mathord', '=', 'mathord', '{ }', '\\sqrt']]
[28]:
test_item_2 = {"options": [r"$x < y$", r"$y = x$", r"$y < x$"]}
[29]:
test_item_2_str = dict2str4sif(test_item_2, tag_mode="head", add_list_no_tag=False)
[30]:
test_item_2_str
[30]:
'$\\SIFTag{options}$$x < y$$\\SIFSep$$y = x$$\\SIFSep$$y < x$'
[31]:
tl2 = sif4sci(test_item_2_str, symbol="gms",
     tokenization_params={"formula_params": {"method": "ast", "return_type": "list"}})
tl2
[31]:
['\\SIFTag{options}', 'x', '<', 'y', '[SEP]', 'y', '=', 'x', '[SEP]', 'y', '<', 'x']
[32]:
tl2.get_segments(add_seg_type=False)
[32]:
[['\\SIFTag{options}'],
 ['x', '<', 'y'],
 ['[SEP]'],
 ['y', '=', 'x'],
 ['[SEP]'],
 ['y', '<', 'x']]
[33]:
tl2.get_segments(add_seg_type=False, drop="s")
[33]:
[['\\SIFTag{options}'], ['x', '<', 'y'], ['y', '=', 'x'], ['y', '<', 'x']]
[34]:
tl3 = sif4sci(test_item_1["stem"], symbol="gs")
tl3.text_segments
[34]:
[['说法', '正确']]
[35]:
tl3.formula_segments
[35]:
[['x', '=', '2'], ['y', '=', '\\sqrt', '{', 'x', '}']]
[36]:
tl3.figure_segments
[36]:
[]
[37]:
tl3.ques_mark_segments
[37]:
[['\\SIFChoice']]
[ ]: