sif_addition¶
[3]:
from EduNLP.SIF import is_sif, to_sif,sif4sci
is_sif¶
[4]:
text = '若$x,y$满足约束条件' \
'$\\left\\{\\begin{array}{c}2 x+y-2 \\leq 0 \\\\ x-y-1 \\geq 0 \\\\ y+1 \\geq 0\\end{array}\\right.$,' \
'则$z=x+7 y$的最大值$\\SIFUnderline$'
is_sif(text)
[4]:
True
[5]:
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'
is_sif(text)
[5]:
False
to_sif¶
[6]:
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'
to_sif(text)
[6]:
'某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$(单位...'
sif4sci¶
to_symbolize: - “t”: text - “f”: formula - “g”: figure - “m”: question mark
[14]:
test_item = r"如图所示,则$\bigtriangleup ABC$的面积是$\SIFBlank$。$\FigureID{1}$"
t1 = sif4sci(test_item)
t1
[14]:
['如图所示', '\\bigtriangleup', 'ABC', '面积', '\\SIFBlank', \FigureID{1}]
[15]:
t1.describe()
[15]:
{'t': 2, 'f': 2, 'g': 1, 'm': 1}
[17]:
with t1.filter('fgm'):
print(t1)
['如图所示', '面积']
[18]:
with t1.filter(keep='t'):
print(t1)
['如图所示', '面积']
[19]:
with t1.filter():
print(t1)
['如图所示', '\\bigtriangleup', 'ABC', '面积', '\\SIFBlank', \FigureID{1}]
[20]:
t1.text_tokens
[20]:
['如图所示', '面积']
[23]:
t1.formula_tokens
[23]:
['\\bigtriangleup', 'ABC']
[24]:
t1.figure_tokens
[24]:
[\FigureID{1}]
[25]:
t1.ques_mark_tokens
[25]:
['\\SIFBlank']
[26]:
sif4sci(test_item, symbol="gm", tokenization_params={"formula_params": {"method": "ast"}})
[26]:
['如图所示', <Formula: \bigtriangleup ABC>, '面积', '[MARK]', '[FIGURE]']
[27]:
sif4sci(test_item, symbol="tfgm")
[27]:
['[TEXT]', '[FORMULA]', '[TEXT]', '[MARK]', '[TEXT]', '[FIGURE]']
[28]:
sif4sci(test_item, symbol="gm", tokenization_params={"formula_params": {"method": "ast", "return_type": "list"}})
[28]:
['如图所示', '\\bigtriangleup', 'A', 'B', 'C', '面积', '[MARK]', '[FIGURE]']
[29]:
test_item_1 = {
"stem": r"若$x=2$, $y=\sqrt{x}$,则下列说法正确的是$\SIFChoice$",
"options": [r"$x < y$", r"$y = x$", r"$y < x$"]
}
[30]:
tls = [
sif4sci(e, symbol="gm",
tokenization_params={
"formula_params": {
"method": "ast", "return_type": "list", "ord2token": True, "var_numbering": True,
"link_variable": False}
})
for e in ([test_item_1["stem"]] + test_item_1["options"])
]
[33]:
tls
[33]:
[['mathord_0', '=', 'textord', 'mathord_1', '=', 'mathord_0', '{ }', '\\sqrt', '说法', '正确', '[MARK]'],
['mathord_0', '<', 'mathord_1'],
['mathord_0', '=', 'mathord_1'],
['mathord_0', '<', 'mathord_1']]
[34]:
tls[1:]
[34]:
[['mathord_0', '<', 'mathord_1'],
['mathord_0', '=', 'mathord_1'],
['mathord_0', '<', 'mathord_1']]
[35]:
from EduNLP.utils import dict2str4sif
test_item_1_str = dict2str4sif(test_item_1, tag_mode="head", add_list_no_tag=False)
test_item_1_str
[35]:
'$\\SIFTag{stem}$若$x=2$, $y=\\sqrt{x}$,则下列说法正确的是$\\SIFChoice$$\\SIFTag{options}$$x < y$$\\SIFSep$$y = x$$\\SIFSep$$y < x$'
[36]:
tl1 = sif4sci(
test_item_1_str,
symbol="gm",
tokenization_params={
"formula_params": {"method": "ast", "return_type": "list", "ord2token": True}
})
[37]:
tl1.get_segments()[0]
[37]:
['\\SIFTag{stem}']
[38]:
tl1.get_segments()[1:3]
[38]:
[['[TEXT_BEGIN]', '[TEXT_END]'],
['[FORMULA_BEGIN]', 'mathord', '=', 'textord', '[FORMULA_END]']]
[39]:
tl1.get_segments(add_seg_type=False)[0:3]
[39]:
[['\\SIFTag{stem}'],
['mathord', '=', 'textord'],
['mathord', '=', 'mathord', '{ }', '\\sqrt']]
[41]:
test_item_2 = {"options": [r"$x < y$", r"$y = x$", r"$y < x$"]}
[42]:
test_item_2_str = dict2str4sif(test_item_2, tag_mode="head", add_list_no_tag=False)
[43]:
test_item_2_str
[43]:
'$\\SIFTag{options}$$x < y$$\\SIFSep$$y = x$$\\SIFSep$$y < x$'
[44]:
tl2 = sif4sci(test_item_2_str, symbol="gms",
tokenization_params={"formula_params": {"method": "ast", "return_type": "list"}})
tl2
[44]:
['\\SIFTag{options}', 'x', '<', 'y', '[SEP]', 'y', '=', 'x', '[SEP]', 'y', '<', 'x']
[45]:
tl2.get_segments(add_seg_type=False)
[45]:
[['\\SIFTag{options}'],
['x', '<', 'y'],
['[SEP]'],
['y', '=', 'x'],
['[SEP]'],
['y', '<', 'x']]
[46]:
tl2.get_segments(add_seg_type=False, drop="s")
[46]:
[['\\SIFTag{options}'], ['x', '<', 'y'], ['y', '=', 'x'], ['y', '<', 'x']]
[47]:
tl3 = sif4sci(test_item_1["stem"], symbol="gs")
tl3.text_segments
[47]:
[['说法', '正确']]
[48]:
tl3.formula_segments
[48]:
[['x', '=', '2'], ['y', '=', '\\sqrt', '{', 'x', '}']]
[49]:
tl3.figure_segments
[49]:
[]
[50]:
tl3.ques_mark_segments
[50]:
[['\\SIFChoice']]
[ ]: