sif_addition

[3]:
from EduNLP.SIF import is_sif, to_sif,sif4sci

is_sif

[4]:
 text = '若$x,y$满足约束条件' \
     '$\\left\\{\\begin{array}{c}2 x+y-2 \\leq 0 \\\\ x-y-1 \\geq 0 \\\\ y+1 \\geq 0\\end{array}\\right.$,' \
    '则$z=x+7 y$的最大值$\\SIFUnderline$'

is_sif(text)

[4]:
True
[5]:
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'
is_sif(text)
[5]:
False

to_sif

[6]:
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'
to_sif(text)
[6]:
'某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$(单位...'

sif4sci

to_symbolize: - “t”: text - “f”: formula - “g”: figure - “m”: question mark

[14]:
 test_item = r"如图所示,则$\bigtriangleup ABC$的面积是$\SIFBlank$。$\FigureID{1}$"
 t1 = sif4sci(test_item)
 t1
[14]:
['如图所示', '\\bigtriangleup', 'ABC', '面积', '\\SIFBlank', \FigureID{1}]
[15]:
t1.describe()
[15]:
{'t': 2, 'f': 2, 'g': 1, 'm': 1}
[17]:
with t1.filter('fgm'):
    print(t1)
['如图所示', '面积']
[18]:
with t1.filter(keep='t'):
    print(t1)
['如图所示', '面积']
[19]:
with t1.filter():
    print(t1)
['如图所示', '\\bigtriangleup', 'ABC', '面积', '\\SIFBlank', \FigureID{1}]
[20]:
t1.text_tokens
[20]:
['如图所示', '面积']
[23]:
t1.formula_tokens
[23]:
['\\bigtriangleup', 'ABC']
[24]:
t1.figure_tokens
[24]:
[\FigureID{1}]
[25]:
t1.ques_mark_tokens
[25]:
['\\SIFBlank']
[26]:
sif4sci(test_item, symbol="gm", tokenization_params={"formula_params": {"method": "ast"}})
[26]:
['如图所示', <Formula: \bigtriangleup ABC>, '面积', '[MARK]', '[FIGURE]']
[27]:
sif4sci(test_item, symbol="tfgm")
[27]:
['[TEXT]', '[FORMULA]', '[TEXT]', '[MARK]', '[TEXT]', '[FIGURE]']
[28]:
sif4sci(test_item, symbol="gm", tokenization_params={"formula_params": {"method": "ast", "return_type": "list"}})
[28]:
['如图所示', '\\bigtriangleup', 'A', 'B', 'C', '面积', '[MARK]', '[FIGURE]']
[29]:
 test_item_1 = {
     "stem": r"若$x=2$, $y=\sqrt{x}$,则下列说法正确的是$\SIFChoice$",
     "options": [r"$x < y$", r"$y = x$", r"$y < x$"]
  }
[30]:
 tls = [
         sif4sci(e, symbol="gm",
         tokenization_params={
            "formula_params": {
                 "method": "ast", "return_type": "list", "ord2token": True, "var_numbering": True,
                 "link_variable": False}
         })
         for e in ([test_item_1["stem"]] + test_item_1["options"])
     ]
[33]:
tls
[33]:
[['mathord_0', '=', 'textord', 'mathord_1', '=', 'mathord_0', '{ }', '\\sqrt', '说法', '正确', '[MARK]'],
 ['mathord_0', '<', 'mathord_1'],
 ['mathord_0', '=', 'mathord_1'],
 ['mathord_0', '<', 'mathord_1']]
[34]:
tls[1:]
[34]:
[['mathord_0', '<', 'mathord_1'],
 ['mathord_0', '=', 'mathord_1'],
 ['mathord_0', '<', 'mathord_1']]
[35]:
from EduNLP.utils import dict2str4sif

test_item_1_str = dict2str4sif(test_item_1, tag_mode="head", add_list_no_tag=False)
test_item_1_str
[35]:
'$\\SIFTag{stem}$若$x=2$, $y=\\sqrt{x}$,则下列说法正确的是$\\SIFChoice$$\\SIFTag{options}$$x < y$$\\SIFSep$$y = x$$\\SIFSep$$y < x$'
[36]:
tl1 = sif4sci(
    test_item_1_str,
    symbol="gm",
    tokenization_params={
        "formula_params": {"method": "ast", "return_type": "list", "ord2token": True}
    })
[37]:
tl1.get_segments()[0]
[37]:
['\\SIFTag{stem}']
[38]:
tl1.get_segments()[1:3]
[38]:
[['[TEXT_BEGIN]', '[TEXT_END]'],
 ['[FORMULA_BEGIN]', 'mathord', '=', 'textord', '[FORMULA_END]']]
[39]:
tl1.get_segments(add_seg_type=False)[0:3]
[39]:
[['\\SIFTag{stem}'],
 ['mathord', '=', 'textord'],
 ['mathord', '=', 'mathord', '{ }', '\\sqrt']]
[41]:
test_item_2 = {"options": [r"$x < y$", r"$y = x$", r"$y < x$"]}
[42]:
test_item_2_str = dict2str4sif(test_item_2, tag_mode="head", add_list_no_tag=False)
[43]:
test_item_2_str
[43]:
'$\\SIFTag{options}$$x < y$$\\SIFSep$$y = x$$\\SIFSep$$y < x$'
[44]:
tl2 = sif4sci(test_item_2_str, symbol="gms",
     tokenization_params={"formula_params": {"method": "ast", "return_type": "list"}})
tl2
[44]:
['\\SIFTag{options}', 'x', '<', 'y', '[SEP]', 'y', '=', 'x', '[SEP]', 'y', '<', 'x']
[45]:
tl2.get_segments(add_seg_type=False)
[45]:
[['\\SIFTag{options}'],
 ['x', '<', 'y'],
 ['[SEP]'],
 ['y', '=', 'x'],
 ['[SEP]'],
 ['y', '<', 'x']]
[46]:
tl2.get_segments(add_seg_type=False, drop="s")
[46]:
[['\\SIFTag{options}'], ['x', '<', 'y'], ['y', '=', 'x'], ['y', '<', 'x']]
[47]:
tl3 = sif4sci(test_item_1["stem"], symbol="gs")
tl3.text_segments
[47]:
[['说法', '正确']]
[48]:
tl3.formula_segments
[48]:
[['x', '=', '2'], ['y', '=', '\\sqrt', '{', 'x', '}']]
[49]:
tl3.figure_segments
[49]:
[]
[50]:
tl3.ques_mark_segments
[50]:
[['\\SIFChoice']]
[ ]: